連續登入及其變種

A1340399426發表於2024-06-20

樣例資料

點選檢視程式碼
insert overwrite table test.ysj_lianxu_not 
  select 'a',        't1'
  union all
  select 'a',        't2'
  union all
  select 'a',        't3'
  union all 
  select 'b',         't3'
  union all
  select 'a',        't4'
  union all
  select 'a',        't5'
  union all
  select 'a',        't6'
  union all
  select 'a',        't7'
;

連續登入超過4天的使用者

點選檢視程式碼
SELECT diff,count(DISTINCT user_id ) d_ct
from (
SELECT user_id,click_time,date_sub(click_time,rk) as diff 
from (
SELECT user_id,click_time,row_number() over(PARTITION BY user_id ORDER BY click_time ) as rk 
from test.ysj_lianxu_not
) aa
) temp 
GROUP BY diff 
having count(DISTINCT user_id ) >=4
;

連續登入使用者,且中間沒有其他使用者登入

  • 第一種方式
點選檢視程式碼
select  user_id,diff,count(*)
from
(select user_id,rk_1-rk_2 diff
from
(select user_id
,row_number() over(order by click_time asc ) rk_1
,row_number() over(partition by user_id order by click_time asc ) rk_2
from test.ysj_lianxu_not
) b
) c
group by user_id,diff
having count(*)=4
;

第一種方式有以下的問題,應該用第二種方法

點選檢視程式碼
--連續登陸天數的使用者,並且中間沒有其他使用者登陸,此方法有問題,如果兩個使用者a和B,a使用者和b使用者在同一天的全域性排序的順序不同,則會造成誤差
-- 	user_id	click_time	rk_1	rk_2
-- 	a	     t1	         1	     1
-- 	a	     t2	         2	     2
-- 	b	     t3	         3	     1
-- 	a	     t3	         4	     3 比如t3這一天,a排在b的後邊,則會造成這一天資料和之後的日期連續,計算出來的a的連續登陸天數就會多一天
-- 	a	     t4	         5	     4
-- 	a	     t5	         6	     5
-- 	a	     t6	         7	     6
-- 	a	     t7	         8	     7
	

  • 第二種方式:
點選檢視程式碼
WITH temp as (
SELECt click_time
from (
SELECT click_time,count(distinct user_id) as ct 
from test.ysj_lianxu_not
GROUP BY click_time
) aa
where ct = 1 
)
select  diff,user_id,count(*)
from (
select user_id,cast(replace(click_time,'t','') as int) - rk as diff 
from (
        SELECT  t2.user_id,t1.click_time,row_number() over(PARTITION BY t2.user_id ORDER BY t1.click_time ASC) as rk 
        from temp t1
        left join test.ysj_lianxu_not t2
        on t1.click_time = t2.click_time
        ) aa 
) bb 
GROUP BY diff,user_id
;

連續登入天數,允許間隔3天
間隔見天都可以,只需要將sql中的space函式的引數改成對應的天數減1就能滿足

點選檢視程式碼
with temp as (
select 111 as uid,'2023-01-01' as dt 
union all
select 111 as uid,'2023-01-03' as dt 
union all
select 111 as uid,'2023-01-05' as dt 
union all
select 111 as uid,'2023-01-06' as dt 
union all
select 111 as uid,'2023-01-09' as dt 
union all
select 111 as uid,'2023-01-10' as dt 
union all
select 111 as uid,'2023-01-11' as dt 
union all
select 111 as uid,'2023-01-13' as dt 
union all
select 111 as uid,'2023-01-17' as dt 
union all
select 111 as uid,'2023-01-19' as dt 
union all
select 111 as uid,'2023-01-20' as dt 
union all
select 222 as uid,'2023-01-01' as dt 
union all
select 222 as uid,'2023-01-02' as dt 
union all
select 222 as uid,'2023-01-05' as dt 
union all
select 222 as uid,'2023-01-07' as dt 
union all
select 222 as uid,'2023-01-08' as dt 
),
temp2 as (
        select *,date_add(tt.dt,sp.pos) as last_dt
        from temp  tt
        lateral view posexplode(split(space(2),'')) sp 
) ,
temp3 as (
        select uid,last_dt
        from temp2
        group by last_dt,uid
) 
select uid,diff,count(*) as ct 
from (
        select uid,date_sub(last_dt,rk) as diff
        from (
                select uid,last_dt,row_number() over(partition by uid order by last_dt ) as rk
                from temp3
        ) result 
) ed
group by uid,diff 

相關文章