Hive常用函式及自定義函式

13545163656發表於2018-06-08
前言:
    HQL常用函式語法和RDBMS語法相似
    官網參考:
    
    

一.Hive自帶函式
  1. -- 檢視系統自帶函式
    hive> show functions;
  2. least
  3. length
  4. like
  5. max
  6. min
  7. -- 檢視函式使用幫助
 
hive
> desc function extended upper;
 Example:
  > SELECT upper('Facebook') FROM src LIMIT 1;
  'FACEBOOK'

二.常用的函式語法
 舉例:
  1. hive> select * from emp;
  2. OK
  3. 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
  4. 7499 ALLEN SALESMAN 7698 1981-2-20 1600.0 300.0 30
  5. 7521 WARD SALESMAN 7698 1981-2-22 1250.0 500.0 30
  6. 7566 JONES MANAGER 7839 1981-4-2 2975.0 NULL 20
  7. 7654 MARTIN SALESMAN 7698 1981-9-28 1250.0 1400.0 30
  8. 7698 BLAKE MANAGER 7839 1981-5-1 2850.0 NULL 30
  9. 7782 CLARK MANAGER 7839 1981-6-9 2450.0 NULL 10
  10. 7788 SCOTT ANALYST 7566 1987-4-19 3000.0 NULL 20
  11. 7839 KING PRESIDENT NULL 1981-11-17 5000.0 NULL 10
  12. 7844 TURNER SALESMAN 7698 1981-9-8 1500.0 0.0 30
  13. 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20
  14. 7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
  15. 7902 FORD ANALYST 7566 1981-12-3 3000.0 NULL 20
  16. 7934 MILLER CLERK 7782 1982-1-23 1300.0 NULL 10
  17. 8888 HIVE PROGRAM 7839 1988-1-23 10300.0 NULL NULL

  1. -- 根據工資降序排序 取前三
  2. hive> select * from emp order by salary desc limit 3;
  3. OK
  4. 7369 SMITH CLERK 7902 1980-12-17 800.0 NULL 20
  5. 7900 JAMES CLERK 7698 1981-12-3 950.0 NULL 30
  6. 7876 ADAMS CLERK 7788 1987-5-23 1100.0 NULL 20

  1. -- 求每個部門的最大工資,最小工資,平均工資,所有工資
  2. select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
  3. OK
    NULL    10300.0 10300.0 10300.0 10300.0
    10      5000.0  1300.0  2916.6666666666665      8750.0
    20      3000.0  800.0   2175.0  10875.0
    30      2850.0  950.0   1566.6666666666667      9400.0


  1. -- 求每個部門的最大工資,最小工資,平均工資,所有工資中平均工資大於2000
  2. select deptno, max(salary),min(salary),avg(salary),sum(salary) from ruozedata_emp group by deptno
  3. where avg(salary)>3000
  4. OK
  5. NULL 10300.0 10300.0 10300.0 10300.0
  6. 20 3000.0 800.0 2175.0 10875.0

  1. -- 求每個部門工資前2的姓名和工資
  2. select t.*
  3. from
  4. (select
  5. deptno,
  6. ename,
  7. salary,
  8. row_number() over(partition by deptno order by salary desc ) as number
  9. from emp)t
  10. where t.number<=2;
  11. OK
  12. NULL HIVE 10300.0 1
  13. 10 KING 5000.0 1
  14. 10 CLARK 2450.0 2
  15. 20 SCOTT 3000.0 1
  16. 20 FORD 3000.0 2
  17. 30 BLAKE 2850.0 1
  18. 30 ALLEN 1600.0 2

  1. -- 統計每個部門的人數
  2. select deptno,count(1) from emp group by deptno;
  3. OK
  4. NULL 1
  5. 10 3
  6. 20 5
  7. 30 6

  1. -- CASE WHEN統計工資情況
  2. select ename,salary,
  3. case
  4. when salary>1 and salary<=1000 then 'lower'
  5. when salary>1000 and salary<=2000 then 'middle'
  6. when salary>2000 and salary<=4000 then 'high'
  7. else 'highest'
  8. end
  9. from emp
OK
SMITH   800.0   lower
ALLEN   1600.0  middle
WARD    1250.0  middle
JONES   2975.0  high
MARTIN  1250.0  middle
BLAKE   2850.0  high


  1. -- 型別轉換
  2. hive> select cast('2018-06-17' as date) from dual;
    OK
    2018-06-17

  3. -- 擷取字串
  4. hive> select substr('abcdef',1,5) from dual;
    OK
    abcde

  5. -- 拼接
  6. hive> select concat('123','456') from dual;
    OK
    123456
  7. -- 制定分割拼接
  8. hive> select concat_ws('.','www','baidu','com') from dual;
    OK
  1. -- 獲取長度
  2. hive> select length(concat_ws('.','www','baidu','com')) from dual;
    OK
  3. -- split拆分
  4. hive> select split(concat_ws('.','www','baidu','com'),'\\.') from dual;
    OK
    ["www","baidu","com"]
    Time taken: 0.084 seconds, Fetched: 1 row(s)
  1. -- 拆分換行
 hive> select explode(split(concat_ws('.','www','baidu','com'),'\\.') ) from dual;
 OK
 www
 baidu
 com

  1. -- 大寫轉換小寫
    hive> select lower('ABC') from dual;
    OK
    abc

  2. -- 小寫轉換大寫
    hive> select upper('abc') from dual;
    OK
    ABC

  1. -- json匯入查詢
  2. hive> create table json(json string);
  3. OK
  4. Time taken: 0.059 seconds
  5. -- 載入json檔案
  6. hive> load data local inpath'/home/hadoop/data/test.json' into table json;
  7. hive> select * from json;
  8. OK
  9. {"name":"zhangsan","age":28,"addr":"hz"}
  10. {"name":"lisi","age":18,"addr":"bj"}
  11. {"name":"wangwu","age":10,"addr":"sh"}
  12. Time taken: 0.151 seconds, Fetched: 3 row(s)
  13. -- json_tuple查詢
  14. hive> select json_tuple(json,'name','age','addr') as (name,age,addr)  from json;
    OK
    zhangsan        28      hz
    lisi            18      bj
    wangwu          10      sh
    Time taken: 0.077 seconds, Fetched: 3 row(s)


三.常用時間函式
  1. -- current_date返回年 月 日
hive> select current_date  from dual;
OK
2018-06-07


-- 返回unix時間搓
hive> select unix_timestamp() from dual;
OK
1528403209

-- current_timestamp返回年月入時分秒
hive> select current_timestamp from dual;
OK
2018-06-07 16:35:12.752
Time taken: 0.217 seconds, Fetched: 1 row(s)


-- 獲取年月日
hive> select to_date('2018-06-07 16:44:44') from dual;
OK
2018-06-07

-- 獲取年
hive> select year('2018-06-07 16:44:44') from dual;
OK
2018

-- 獲取月
hive> select month('2018-06-07 16:44:44') from dual;
OK
6

-- 獲取日
hive> select day('2018-06-07 16:44:44') from dual;
OK
7

-- 獲取小時
hive> select hour('2018-06-07 16:44:44') from dual;
OK
16

-- 獲取分鐘
hive> select minute('2018-06-07 16:44:44') from dual;
OK
44


-- 獲取秒
hive> select second('2018-06-07 16:44:44') from dual;
OK


-- 時間天相加
hive> select date_add('2018-06-07',10) from dual;
OK
2018-06-17

-- 時間天相減
hive> select date_sub('2018-06-17',10) from dual;
OK
2018-06-07

四.自定義函式

.....

    

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/31441024/viewspace-2155864/,如需轉載,請註明出處,否則將追究法律責任。

相關文章