-- 先执行前一天的汇总数据
INSERT OVERWRITE TABLE target_table PARTITION( dt = '$yesterday' )
SELECT A.query, COUNT(1) AS n_clk
FROM src_table A
WHERE A.dt > DATA_SUB('$yesterday', 60)
GROUP BY A.query
;
-- 更新
SELECT
FROM (
-- 前一天的汇总数据
SELECT A.query, A.n_clk
FROM target_table A
WHERE A.dt = '$yesterday'
UNION ALL
-- 加上今天的数据
SELECT A.query, COUNT(1) AS n_clk
FROM src_table A
WHERE A.dt = '$today'
GROUP BY A.query
UNION ALL
-- 减去60天前的数据
SELECT A.query, -1*COUNT(1) AS n_clk
FROM src_table A
WHERE A.dt = DATA_SUB('$today', 60)
GROUP BY A.query
-- 或者可以把上面的两天合并在一起写
-- SELECT A.query
-- , if(A.dt = '$today', COUNT(1), -1*COUNT(1)) AS n_clk
-- FROM src_table A
-- WHERE A.dt in ('$today', DATA_SUB('$today', 60))
-- GROUP BY A.query
) A
-- 先执行前一天的汇总数据
INSERT OVERWRITE TABLE target_table PARTITION( dt = '$yesterday' )
SELECT A.query
, str_to_map(concat_ws(',', collect_list(concat_ws(':', A.dt, CAST(A.n_clk AS string))))) AS data_map
-- str_to_map 默认使用 ',' 和 ':' 分割
FROM (
SELECT A.query, A.dt, COUNT(1) AS n_clk
FROM src_table A
WHERE A.dt > DATA_SUB('$yesterday', 60)
GROUP BY A.query, A.dt
) A
GROUP BY A.query
;
-- 更新
SELECT A.query
, str_to_map(concat_ws(',', collect_list(concat_ws(':', A.dt, CAST(A.n_clk AS string))))) AS data_map
FROM (
SELECT A.query, B.dt, B.n_clk
FROM target_table A
LATERAL VIEW explode(A.data) B AS dt, n_clk
WHERE A.dt = '$yesterday'
AND B.dt > DATA_SUB('$today', 60)
UNION ALL
-- 加上今天的数据
SELECT A.query, A.dt, COUNT(1) AS n_clk
FROM src_table A
WHERE A.dt = '$today'
GROUP BY A.query, A.dt
) A
例3: 通过最新日期过滤
-- 先执行前一天的汇总数据
INSERT OVERWRITE TABLE target_table PARTITION( dt = '$yesterday' )
SELECT A.query
, max(A.dt) AS latest_dt
FROM src_table A
WHERE A.dt > DATA_SUB('$yesterday', 30)
GROUP BY A.query
;
-- 更新
SELECT A.query, A.latest_dt
FROM (
SELECT COALESCE(B.query, A.query) AS query
, COALESCE(B.dt,
if(A.latest_dt > DATA_SUB('$today', 30),
A.latest_dt, '-1')) AS latest_dt
FROM (
SELECT A.query, A.latest_dt
FROM target_table A
WHERE A.dt = '$yesterday'
) A
FULL JOIN (
SELECT A.query, A.dt
FROM src_table A
WHERE A.dt = '$today'
) B
ON A.query = B.query
) A
WHERE A.latest_dt != '-1'
;