1、簡單描述
-
arrays:
ARRAY<data_type>
-
maps:
MAP<primitive_type, data_type>
-
structs:
STRUCT<col_name : data_type [COMMENT col_comment], ...>
-
union:
UNIONTYPE<data_type, data_type, ...>
Hive 中對該型別的完全支援仍然不完整。如果 JOIN、WHERE 和 GROUP BY 子句中引用的 UNIONTYPE 欄位的查詢將會失敗,Hive 沒有定義語法來提取 UNIONTYPE 的 tag 或 value 欄位。
複雜資料型別的建構函式:
建構函式 | 運算元 | 描述 |
---|---|---|
map | (key1, value1, key2, value2, ...) | Creates a map with the given key/value pairs. |
struct | (val1, val2, val3, ...) | Creates a struct with the given field values. Struct field names will be col1, col2, .... |
named_struct | (name1, val1, name2, val2, ...) | Creates a struct with the given field names and values. (As of Hive 0.8.0.) |
array | (val1, val2, ...) | Creates an array with the given elements. |
create_union | (tag, val1, val2, ...) | Creates a union type with the value that is being pointed to by the tag parameter. |
注:create_union 中的 tag 讓我們知道 union 的哪一部分正在被使用。
複雜資料型別訪問元素:
建構函式 | 運算元 | 描述 |
---|---|---|
A[n] | A is an Array and n is an int | Returns the nth element in the array A. The first element has index 0. For example, if A is an array comprising of ['foo', 'bar'] then A[0] returns 'foo' and A[1] returns 'bar'. |
M[key] | M is a Map<K, V> and key has type K | Returns the value corresponding to the key in the map. For example, if M is a map comprising of {'f' -> 'foo', 'b' -> 'bar', 'all' -> 'foobar'} then M['all'] returns 'foobar'. |
S.x | S is a struct | Returns the x field of S. For example for the struct foobar {int foo, int bar}, foobar.foo returns the integer stored in the foo field of the struct. |
2、測試
-- ------------------------------ ARRAY ------------------------------
-- ARRAY<data_type>
create table arraytest (id int,info array<string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','
stored as textfile;
-- 不要忽略`collection items terminated by ','
-- 它表示陣列元素間的分隔符
-- 如果忽略了輸出是這樣的:
hive> select * from arraytest;
OK
1 ["zhangsan,male"]
2 ["lisi,male"]
-- 資料
1 zhangsan,male
2 lisi,male
-- 匯入
load data local inpath '/root/data/arraytest.txt' into table arraytest;
-- 檢視
hive> select * from arraytest;
OK
1 ["zhangsan","male"]
2 ["lisi","male"]
-- 索引檢視陣列元素
hive> select id,info[0] from arraytest;
OK
1 zhangsan
2 lisi
-- 將陣列的所有元素展開輸出
hive> select explode(info) from arraytest;
OK
zhangsan
male
lisi
male
-- ------------------------------ MAP ------------------------------
-- MAP<primitive_type, data_type>
create table maptest (id int,info map<string,string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':'
stored as textfile;
-- 不要忽略`map keys terminated by ':'
-- 它表示鍵值間的分隔符
-- 資料
1 name:zhangsan,sex:male
2 name:lisi,sex:male
-- 匯入
load data local inpath '/root/data/maptest.txt' into table maptest;
-- 檢視
hive> select * from maptest;
OK
1 {"name":"zhangsan","sex":"male"}
2 {"name":"lisi","sex":"male"}
-- 檢視map元素
hive> select id,info["name"] from maptest;
OK
1 zhangsan
2 lisi
-- ------------------------------ STRUCT ------------------------------
-- STRUCT<col_name : data_type [COMMENT col_comment], ...>
create table structtest (id int,info struct<name:string,sex:string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','
stored as textfile;
-- 資料
1 zhangsan,male
2 lisi,male
-- 匯入
load data local inpath '/root/data/structtest.txt' into table structtest;
-- 檢視
hive> select * from structtest;
OK
1 {"name":"zhangsan","sex":"male"}
2 {"name":"lisi","sex":"male"}
hive> select id,info.name from structtest;
OK
1 zhangsan
2 lisi
-- ------------------------------ 綜合array\map\struct ------------------------------
create table alltest(
id int,
name string,
salary bigint,
sub array<string>,
details map<string, int>,
address struct<city:string, state:string, pin:int>
)
row format delimited
fields terminated by ','
collection items terminated by '$'
map keys terminated by '#'
stored as textfile;
-- 資料
1,abc,40000,a$b$c,pf#500$epf#200,hyd$ap$500001
2,def,3000,d$f,pf#500,bang$kar$600038
4,abc,40000,a$b$c,pf#500$epf#200,bhopal$MP$452013
5,def,3000,d$f,pf#500,Indore$MP$452014
-- 匯入資料
load data local inpath '/root/data/alltest.txt' into table alltest;
-- 檢視
hive> select * from alltest;
OK
1 abc 40000 ["a","b","c"] {"pf":500,"epf":200} {"city":"hyd","state":"ap","pin":500001}
2 def 3000 ["d","f"] {"pf":500} {"city":"bang","state":"kar","pin":600038}
4 abc 40000 ["a","b","c"] {"pf":500,"epf":200} {"city":"bhopal","state":"MP","pin":452013}
5 def 3000 ["d","f"] {"pf":500} {"city":"Indore","state":"MP","pin":452014}
-- ------------------------------ UNIONTYPE ------------------------------
-- create_union(tag, val1, val2, ...)
-- Creates a union type with the value that is being pointed to by the tag parameter.
-- ---- 簡單示例:裡面都是基本型別 ------
create table uniontest(
id int,
info uniontype<string,string>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
stored as textfile;
-- 插入資料:insert into
-- tag 索引後面的值是從 0 開始的
insert into table uniontest
values
(1,create_union(0,"zhangsan","male")), -- 使用 "zhangsan"
(1,create_union(1,"zhangsan","male")), -- 使用 "male"
(2,create_union(0,"lisi","female")),
(2,create_union(1,"lisi","female"));
-- 檢視
hive> select * from uniontest;
OK
1 {0:"zhangsan"}
1 {1:"male"}
2 {0:"lisi"}
2 {1:"female"}
-- 資料
1 0,zhangsan
1 1,male
2 0,lisi
2 1,female
-- 插入資料:load data
load data local inpath '/root/data/uniontest.txt' into table uniontest;
-- 檢視
hive> select * from uniontest;
OK
1 {0:"zhangsan"}
1 {1:"male"}
2 {0:"lisi"}
2 {1:"female"}
-- 如果資料格式是這樣的:
-- 1 0,zhangsan,male
-- 1 1,zhangsan,male
-- 2 0,lisi,female
-- 2 1,lisi,female
-- 會把後面的字串當作一個整體,輸出:
-- 1 {0:"zhangsan,male"}
-- 1 {1:"zhangsan,male"}
-- 2 {0:"lisi,female"}
-- 2 {1:"lisi,female"}
-- ---- 複雜示例:裡面包含複雜型別 ------
create table uniontest_comp(
id int,
info uniontype<int,
string,
array<string>,
map<string,string>,
struct<sex:string,age:string>>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
stored as textfile;
-- 插入資料
-- 也可以使用 `insert into table ....select ....`
insert into table uniontest_comp
values
(1,create_union(0,1,"zhangsan",array("male","33"),map("sex","male","age","33"),named_struct("sex","male","age","33"))),
(1,create_union(1,1,"zhangsan",array("male","33"),map("sex","male","age","33"),named_struct("sex","male","age","33"))),
(1,create_union(2,1,"zhangsan",array("male","33"),map("sex","male","age","33"),named_struct("sex","male","age","33"))),
(1,create_union(3,1,"zhangsan",array("male","33"),map("sex","male","age","33"),named_struct("sex","male","age","33"))),
(1,create_union(4,1,"zhangsan",array("male","33"),map("sex","male","age","33"),named_struct("sex","male","age","33")));
-- 檢視
hive> select * from uniontest_comp;
OK
1 {0:1}
1 {1:"zhangsan"}
1 {2:["male","33"]}
1 {3:{"sex":"male","age":"33"}}
1 {4:{"sex":"male","age":"33"}}
參考:http://querydb.blogspot.com/2015/11/hive-complex-data-types.html