樣例資料
點選檢視程式碼
insert overwrite table test.ysj_lianxu_not
select 'a', 't1'
union all
select 'a', 't2'
union all
select 'a', 't3'
union all
select 'b', 't3'
union all
select 'a', 't4'
union all
select 'a', 't5'
union all
select 'a', 't6'
union all
select 'a', 't7'
;
連續登入超過4天的使用者
點選檢視程式碼
SELECT diff,count(DISTINCT user_id ) d_ct
from (
SELECT user_id,click_time,date_sub(click_time,rk) as diff
from (
SELECT user_id,click_time,row_number() over(PARTITION BY user_id ORDER BY click_time ) as rk
from test.ysj_lianxu_not
) aa
) temp
GROUP BY diff
having count(DISTINCT user_id ) >=4
;
連續登入使用者,且中間沒有其他使用者登入
- 第一種方式
點選檢視程式碼
select user_id,diff,count(*)
from
(select user_id,rk_1-rk_2 diff
from
(select user_id
,row_number() over(order by click_time asc ) rk_1
,row_number() over(partition by user_id order by click_time asc ) rk_2
from test.ysj_lianxu_not
) b
) c
group by user_id,diff
having count(*)=4
;
第一種方式有以下的問題,應該用第二種方法
點選檢視程式碼
--連續登陸天數的使用者,並且中間沒有其他使用者登陸,此方法有問題,如果兩個使用者a和B,a使用者和b使用者在同一天的全域性排序的順序不同,則會造成誤差
-- user_id click_time rk_1 rk_2
-- a t1 1 1
-- a t2 2 2
-- b t3 3 1
-- a t3 4 3 比如t3這一天,a排在b的後邊,則會造成這一天資料和之後的日期連續,計算出來的a的連續登陸天數就會多一天
-- a t4 5 4
-- a t5 6 5
-- a t6 7 6
-- a t7 8 7
- 第二種方式:
點選檢視程式碼
WITH temp as (
SELECt click_time
from (
SELECT click_time,count(distinct user_id) as ct
from test.ysj_lianxu_not
GROUP BY click_time
) aa
where ct = 1
)
select diff,user_id,count(*)
from (
select user_id,cast(replace(click_time,'t','') as int) - rk as diff
from (
SELECT t2.user_id,t1.click_time,row_number() over(PARTITION BY t2.user_id ORDER BY t1.click_time ASC) as rk
from temp t1
left join test.ysj_lianxu_not t2
on t1.click_time = t2.click_time
) aa
) bb
GROUP BY diff,user_id
;
連續登入天數,允許間隔3天
間隔見天都可以,只需要將sql中的space函式的引數改成對應的天數減1就能滿足
點選檢視程式碼
with temp as (
select 111 as uid,'2023-01-01' as dt
union all
select 111 as uid,'2023-01-03' as dt
union all
select 111 as uid,'2023-01-05' as dt
union all
select 111 as uid,'2023-01-06' as dt
union all
select 111 as uid,'2023-01-09' as dt
union all
select 111 as uid,'2023-01-10' as dt
union all
select 111 as uid,'2023-01-11' as dt
union all
select 111 as uid,'2023-01-13' as dt
union all
select 111 as uid,'2023-01-17' as dt
union all
select 111 as uid,'2023-01-19' as dt
union all
select 111 as uid,'2023-01-20' as dt
union all
select 222 as uid,'2023-01-01' as dt
union all
select 222 as uid,'2023-01-02' as dt
union all
select 222 as uid,'2023-01-05' as dt
union all
select 222 as uid,'2023-01-07' as dt
union all
select 222 as uid,'2023-01-08' as dt
),
temp2 as (
select *,date_add(tt.dt,sp.pos) as last_dt
from temp tt
lateral view posexplode(split(space(2),'')) sp
) ,
temp3 as (
select uid,last_dt
from temp2
group by last_dt,uid
)
select uid,diff,count(*) as ct
from (
select uid,date_sub(last_dt,rk) as diff
from (
select uid,last_dt,row_number() over(partition by uid order by last_dt ) as rk
from temp3
) result
) ed
group by uid,diff