HiveSelect操作

piepis發表於2020-09-23

儲存select查詢結果的幾種方式

1.將查詢結果儲存到一張新的hive表中create table t_tmp
as
select * from t_p;2將查詢結果儲存到一張已經存在的hive表中

eg:(into 增量增加)
insert into table t_tmp
select * from t_p;

#-----------------------------------------------------
eg:(overwrite 覆蓋增加)
insert overwrite table ol_oitf_interface_auc_dimension_value_business
select *
  from tmp_auc_dimension_value_business_new;

3.將查詢結果儲存到指定的檔案目錄(可以是本地,也可以是HDFS)

insert overwrite local directory '/home/hadoop/test'
select * from t_p;
#---------------------------------------------------
#插入HDFS
insert overwrite directory '/aaa/test'
select * from t_p;

Group By操作

GROUP BY子句的語法如下:

SELECT [ALL | DISTINCT] select_expr, select_expr, … FROM
table_reference [WHERE where_condition] [GROUP BY col_list] [HAVING
having_condition] [ORDER BY col_list]] [LIMIT number];eg: SELECT
pageid, age, count(1) FROM pv_users GROUP BY pageid, age;

具體實現過程如圖:

23333333333

例項:
presto:default> select *from(
             -> select one_department,
             ->   two_department,
             ->   three_department,
             ->   four_department,
             ->   five_department ,count(*) as num  from tmp_kn1_ehr_person_organization_20180817 where one_department is not Null group by one_department,
             ->   two_department,
             ->   three_department,
             ->   four_department,
             ->   five_department ) t2 where t2.num>1 limit 5;

2334444

Group By sum 計算

表資料:
    presto:default> select *from tab4 order by id1;
     id1  | size | name 
    ------+------+------
        1 |   10 | AAA  
        2 |   20 | BBB  
        3 |   30 | DDD  
        3 |   30 | CCC  
        4 |   40 | NULL 
        8 | NULL | HHH  
       99 | NULL | TTT  
     NULL | NULL | EEE  
     NULL |   76 | HHH  
    (9 rows)

33333333333333

計算 size 的總數
222333333
22222222222222

Group By 排列組合

表語句 (查詢tab4):
    presto:default> select id1,size,name from tab4 group by id1,size,name order by id1;
     id1  | size | name 
    ------+------+------
        1 |   10 | AAA  
        2 |   20 | BBB  
        3 |   30 | DDD  
        3 |   30 | CCC  
        4 |   40 | NULL 
        8 | NULL | HHH  
       99 | NULL | TTT  
     NULL | NULL | EEE  
     NULL |   76 | HHH  
    (9 rows)
tab4 分組:
    presto:default> select id1,size from tab4 group by id1,size order by id1;
     id1  | size 
    ------+------
        1 |   10 
        2 |   20 
        3 |   30 
        4 |   40 
        8 | NULL 
       99 | NULL 
     NULL | NULL 
     NULL |   76 
    (8 rows)
tab4 分組並 size列 賦值Null:
    presto:default> select id1,'null' as size from tab4 group by id1,size order by id1;
     id1  | size 
    ------+------
        1 | null 
        2 | null 
        3 | null 
        4 | null 
        8 | null 
       99 | null 
     NULL | null 
     NULL | null 
    (8 rows)
tab4 分組並 多出來一列 name 賦值Null: 
    presto:default> 
    presto:default> select id1,size,null as name from tab4 group by id1,size order by id1;
     id1  | size | name 
    ------+------+------
        1 |   10 | NULL 
        2 |   20 | NULL 
        3 |   30 | NULL 
        4 |   40 | NULL 
        8 | NULL | NULL 
       99 | NULL | NULL 
     NULL | NULL | NULL 
     NULL |   76 | NULL 
    (8 rows)
tab4 分組並 多出來一列 size 賦值Null:
presto:default> select id1,size,null as size from tab4 group by id1,size order by id1;
 id1  | size | size 
------+------+------
    1 |   10 | NULL 
    2 |   20 | NULL 
    3 |   30 | NULL 
    4 |   40 | NULL 
    8 | NULL | NULL 
   99 | NULL | NULL 
 NULL | NULL | NULL 
 NULL |   76 | NULL 
(8 rows)
tab4 分組並少一列 size :
presto:default> select id1 from tab4 group by id1,size order by id1;
 id1  
------
    1 
    2 
    3 
    4 
    8 
   99 
 NULL 
 NULL 
(8 rows)

型別轉化 decimal()

create table tab5 as select res_pay_total,price_total,order_id,hotel_id,start_date from tmp_ol_dtfet_selftour_resales_date_go limit 10 union all 1.0000000000000000000000009 as res_pay_total, 2.00000000000009 as price_total,3 as order_id,5 as hotel_id,566 as start_date from dual;


Having