Hive的一個面試題

weixin_33670713發表於2018-08-26

需求:求出每個User截止當月總的流量

user date traffic
熊貓tv 2018-01-02 5
快手 2018-01-02 3
YY 2018-01-02 2
抖音 2018-01-02 15
熊貓tv 2018-01-03 5
快手 2018-01-03 3
YY 2018-01-03 2
抖音 2018-01-03 15
熊貓tv 2018-02-02 5
快手 2018-02-02 3
YY 2018-02-02 2
抖音 2018-02-02 15
熊貓tv 2018-02-03 5
快手 2018-02-03 3
YY 2018-02-03 2
抖音 2018-02-03 15
熊貓tv 2018-03-02 5
快手 2018-03-02 3
YY 2018-03-02 2
抖音 2018-03-02 15
熊貓tv 2018-03-03 5
快手 2018-03-03 3
YY 2018-03-03 2
抖音 2018-03-03 15

hive建立表:

create table user_traffic(user string,date string,traffic bigint) row format delimited fields terminated by '\t';

load data local inpath '/home/hadoop/data/user_traffic.txt' overwrite into table user_traffic;

先按user,月份分組求出每個月的traffic

select temp.user,temp.yearmonth,sum(temp.traffic) from(select 
user,concat(split(date,'-')[0],split(date,'-')[1]) as yearmonth,traffic 
from user_traffic) temp group by temp.user,temp.yearmonth;
2578384-d66220a1803eadf0.png
image.png

表自連線

select t1.*,t2.* from (select temp.user,temp.yearmonth,sum(temp.traffic) from
 (select user,concat(split(date,'-')[0],split(date,'-')[1]) as yearmonth,traffic from user_traffic)
 temp group by temp.user,temp.yearmonth) t1,(select temp.user,temp.yearmonth,sum(temp.traffic) from
 (select user,concat(split(date,'-')[0],split(date,'-')[1]) as yearmonth,traffic from user_traffic) temp group by 
temp.user,temp.yearmonth) t2 where t1.yearmonth=t2.yearmonth and t1.user = t2.user and t1.yearmonth >= t2.yearmonth;
2578384-aeaa95783824a484.png
image.png

select t.user,t.yearmonth,sum(t.total) from (select t1.user,t1.yearmonth,t1.total from (select temp.user,temp.yearmonth,sum(temp.traffic) as total from (select user,concat(split(date,'-')[0],split(date,'-')[1]) as yearmonth,traffic from user_traffic) temp group by temp.user,temp.yearmonth) t1,(select temp.user,temp.yearmonth,sum(temp.traffic) as total from (select user,concat(split(date,'-')[0],split(date,'-')[1]) as yearmonth,traffic from user_traffic) temp group by temp.user,temp.yearmonth) t2 where t1.user = t2.user and t1.yearmonth >= t2.yearmonth) t group by t.user,t.yearmonth;

2578384-97e295c2dc2fa94a.png
image.png

相關文章