-- 先执行前一天的汇总数据INSERT OVERWRITE TABLE target_table PARTITION( dt ='$yesterday' )SELECT A.query, COUNT(1) AS n_clkFROM src_table AWHERE A.dt > DATA_SUB('$yesterday', 60)GROUP BY A.query;-- 更新SELECTFROM (-- 前一天的汇总数据 SELECT A.query, A.n_clk FROM target_table A WHERE A.dt ='$yesterday' UNION ALL-- 加上今天的数据 SELECT A.query, COUNT(1) AS n_clk FROM src_table A WHERE A.dt ='$today' GROUP BY A.query UNION ALL-- 减去60天前的数据 SELECT A.query, -1*COUNT(1) AS n_clk FROM src_table A WHERE A.dt = DATA_SUB('$today', 60) GROUP BY A.query-- 或者可以把上面的两天合并在一起写-- SELECT A.query-- , if(A.dt = '$today', COUNT(1), -1*COUNT(1)) AS n_clk-- FROM src_table A-- WHERE A.dt in ('$today', DATA_SUB('$today', 60))-- GROUP BY A.query) A
-- 先执行前一天的汇总数据INSERT OVERWRITE TABLE target_table PARTITION( dt ='$yesterday' )SELECT A.query , str_to_map(concat_ws(',', collect_list(concat_ws(':', A.dt, CAST(A.n_clk AS string))))) AS data_map-- str_to_map 默认使用 ',' 和 ':' 分割FROM ( SELECT A.query, A.dt, COUNT(1) AS n_clk FROM src_table A WHERE A.dt > DATA_SUB('$yesterday', 60) GROUP BY A.query, A.dt) AGROUP BY A.query;-- 更新SELECT A.query , str_to_map(concat_ws(',', collect_list(concat_ws(':', A.dt, CAST(A.n_clk AS string))))) AS data_mapFROM ( SELECT A.query, B.dt, B.n_clk FROM target_table A LATERAL VIEW explode(A.data) B AS dt, n_clk WHERE A.dt ='$yesterday' AND B.dt > DATA_SUB('$today', 60) UNION ALL-- 加上今天的数据 SELECT A.query, A.dt, COUNT(1) AS n_clk FROM src_table A WHERE A.dt ='$today' GROUP BY A.query, A.dt) A
例3: 通过最新日期过滤
-- 先执行前一天的汇总数据INSERT OVERWRITE TABLE target_table PARTITION( dt ='$yesterday' )SELECT A.query , max(A.dt) AS latest_dtFROM src_table AWHERE A.dt > DATA_SUB('$yesterday', 30)GROUP BY A.query;-- 更新SELECT A.query, A.latest_dtFROM ( SELECT COALESCE(B.query, A.query) AS query , COALESCE(B.dt, if(A.latest_dt > DATA_SUB('$today', 30), A.latest_dt, '-1')) AS latest_dt FROM ( SELECT A.query, A.latest_dt FROM target_table A WHERE A.dt ='$yesterday' ) A FULL JOIN ( SELECT A.query, A.dt FROM src_table A WHERE A.dt ='$today' ) BON A.query = B.query) AWHERE A.latest_dt !='-1';