SQL统计连续登陆3天的用户(连续活跃超3天用户)
目录
1. 数据准备
WITH user_active_info AS ( SELECT * FROM ( VALUES ('10001' , '2023-02-01'),('10001' , '2023-02-03') ,('10001' , '2023-02-04'),('10001' , '2023-02-05') ,('10002' , '2023-02-02'),('10002' , '2023-02-03') ,('10002' , '2023-02-04'),('10002' , '2023-02-05') ,('10002' , '2023-02-07'),('10003' , '2023-02-02') ,('10003' , '2023-02-03'),('10003' , '2023-02-04') ,('10003' , '2023-02-05'),('10003' , '2023-02-06') ,('10003' , '2023-02-07'),('10003' , '2023-02-08') ,('10004' , '2023-02-03'),('10004' , '2023-02-04') ,('10004' , '2023-02-06'),('10004' , '2023-02-07') ,('10004' , '2023-02-08'),('10004' , '2023-02-08') ,('10005' , '2023-02-02'),('10005' , '2023-02-05') ) AS user_active_info(user_id, active_date) )
2. 方法一: 差值计算
SELECT user_id , active_date , ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY active_date) AS rn FROM user_active_info GROUP BY user_id , active_date ;
user_id | active_date | rn |
---|
10001 | 2023-02-01 | 1 |
10001 | 2023-02-03 | 2 |
10001 | 2023-02-04 | 3 |
10001 | 2023-02-05 | 4 |
10002 | 2023-02-02 | 1 |
10002 | 2023-02-03 | 2 |
10002 | 2023-02-04 | 3 |
10002 | 2023-02-05 | 4 |
10002 | 2023-02-07 | 5 |
… | … | … |
SELECT user_id , active_date , rn , DATE_SUB(active_date,rn) AS sub_date FROM ( SELECT user_id , active_date , ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY active_date) AS rn FROM user_active_info GROUP BY user_id , active_date ) a ;
user_id | active_date | rn | sub_date |
---|
10001 | 2023-02-01 | 1 | 2023-01-31 |
10001 | 2023-02-03 | 2 | 2023-02-01 |
10001 | 2023-02-04 | 3 | 2023-02-01 |
10001 | 2023-02-05 | 4 | 2023-02-01 |
10002 | 2023-02-02 | 1 | 2023-02-01 |
10002 | 2023-02-03 | 2 | 2023-02-01 |
10002 | 2023-02-04 | 3 | 2023-02-01 |
10002 | 2023-02-05 | 4 | 2023-02-01 |
10002 | 2023-02-07 | 5 | 2023-02-02 |
… | … | … | … |
SELECT user_id , MIN(active_date) AS begin_date , MAX(active_date) AS end_date , COUNT (1) AS login_duration FROM ( SELECT user_id , active_date , rn , DATE_SUB(active_date,rn) AS sub_date FROM ( SELECT user_id , active_date , ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY active_date) AS rn FROM user_active_info GROUP BY user_id , active_date ) a ) b GROUP BY user_id , sub_date HAVING login_duration >= 3 ;
user_id | begin_date | end_date | login_duration |
---|
10001 | 2023-02-03 | 2023-02-05 | 3 |
10002 | 2023-02-02 | 2023-02-05 | 4 |
10003 | 2023-02-02 | 2023-02-08 | 7 |
10004 | 2023-02-06 | 2023-02-08 | 3 |
3. 方法二: lead或lag函数
SELECT user_id , active_date , lead(active_date , 2 , 0) OVER(PARTITION BY user_id ORDER BY active_date) AS lead_active_date FROM user_active_info GROUP BY user_id , active_date
user_id | active_date | lead_active_date |
---|
10001 | 2023-02-01 | 2023-02-04 |
10001 | 2023-02-03 | 2023-02-05 |
10001 | 2023-02-04 | 0 |
10001 | 2023-02-05 | 0 |
10002 | 2023-02-02 | 2023-02-04 |
10002 | 2023-02-03 | 2023-02-05 |
10002 | 2023-02-04 | 2023-02-07 |
10002 | 2023-02-05 | 0 |
10002 | 2023-02-07 | 0 |
… | … | … |
SELECT user_id , active_date , lead_active_date FROM ( SELECT user_id , active_date , lead(active_date , 2 , 0) OVER(PARTITION BY user_id ORDER BY active_date) AS lead_active_date FROM user_active_info GROUP BY user_id , active_date ) a WHERE lead_active_date != '0' AND DATEDIFF(lead_active_date , active_date) = 2
user_id | active_date | lead_active_date |
---|
10001 | 2023-02-03 | 2023-02-05 |
10002 | 2023-02-02 | 2023-02-04 |
10002 | 2023-02-03 | 2023-02-05 |
… | … | … |
SELECT user_id FROM ( SELECT user_id , active_date , lead_active_date FROM ( SELECT user_id , active_date , lead(active_date , 2 , 0) OVER(PARTITION BY user_id ORDER BY active_date) AS lead_active_date FROM user_active_info GROUP BY user_id , active_date ) a WHERE lead_active_date != '0' AND DATEDIFF(lead_active_date , active_date) = 2 ) b GROUP BY user_id
user_id |
---|
10001 |
10002 |
10003 |
10004 |
end