Hive SQL必刷練習題:同時線上人數問題(*****)

萌哥-爱学习發表於2024-08-04

https://blog.csdn.net/Mikkkee/article/details/136776193

--Drop
DROP TABLE IF EXISTS test_live_events;
 
 
--DDL
CREATE TABLE IF NOT EXISTS test_live_events
(
    user_id      INT COMMENT '使用者id',
    live_id      INT COMMENT '直播id',
    in_datetime  STRING COMMENT '進入直播間時間',
    out_datetime STRING COMMENT '離開直播間時間'
) COMMENT '直播間訪問記錄' STORED AS ORC TBLPROPERTIES ( "orc.compress" = "SNAPPY", "discover.partitions" = "false" );
 
 
--Insert items
INSERT OVERWRITE TABLE test_live_events VALUES 
(1010, 1, '2023-08-12 19:00:00', '2'),
(100, 1, '2023-08-12 19:30:00', '2023-08-12 19:53:00'),
(100, 2, '2023-08-12 21:01:00', '2023-08-12 22:00:00'),
(101, 1, '2023-08-12 19:05:00', '2023-08-12 20:55:00'),
(101, 2, '2023-08-12 21:05:00', '2023-08-12 21:58:00'),
(102, 1, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(102, 2, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(102, 3, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(104, 1, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(104, 2, '2023-08-12 21:57:00', '2023-08-12 22:56:00'),
(105, 2, '2023-08-12 19:10:00', '2023-08-12 19:18:00'),
(106, 3, '2023-08-12 19:01:00', '2023-08-12 21:10:00');









WITH tmp_live_status AS (

create table  tmp as 
SELECT 
  user_id,
  live_id,
  in_datetime AS event_time,
  1 AS status
FROM
  test_live_events
UNION ALL
SELECT 
  user_id,
  live_id,
  out_datetime AS event_time,
  -1 AS status
FROM
  test_live_events
)
--建立臨時表 ,登入的時間 後面加個自動 為1  ,登出的時間 資料 加個自動 固定值為-1  兩個資料結果集union 起來 
SELECT live_id, MAX(online_cnt) AS online_cnt FROM ( SELECT user_id, live_id, event_time, status, SUM(status) OVER (PARTITION BY live_id ORDER BY event_time) AS online_cnt
-- 基於 遷入\出 時間進行排序, 基於直播間id進行分組. sum 遷入遷出的度量值 . 最大的那個就是某個時間點的 直播間
最多人數 FROM tmp ) a GROUP BY live_id ;

相關文章