Hive/Spark SQL 常用查询记录

last modify

Keywords: Hive/Spark SQL

聚合操作

个人习惯

集合字段 map/json

  • 作为一个额外字段, 可以把新的字段统一放到这里, 防止频繁修改表;

-- CREATE
CREATE TABLE db.table (
    ...
    , `features`    map < string, double >  COMMENT 'features'
    , `info_json`   string                  COMMENT 'extra info'
) COMMENT 'xxx 特征表'

-- INSERT

INSERT OVERWRITE TABLE db.table
SELECT ...
    , map(
          'f1', f1_double
        , 'f2', f2_int
        , ...
      ) AS features
    , concat('{'
        , '"k1":', k1_int, ','  -- 注意 key 要双引号
        , '"k2":', k2
      , '}') AS info_json
FROM ...

-- SELECT
SELECT ...
    ,  features['f1'] AS f1
    ,  features['f2'] AS f2
    ,  get_json_object(info_json, '$.k1') AS k1
    ,  get_json_object(info_json, '$.k2') AS k2
FROM db.table

小词表

Last updated