近N日留存
现有用户登录表user_active_log一份,里面有两个字段:userId(用户ID),createdTime(登录日期),需要统计近1,2,3,5,7,30日留存用户数量及留存率。
思路:登录日期减去第一个日期的差值
第一步:选取12月份的记录,根据用户id和登录日期去重
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
第二步:创建新列first_time,获取每个userId下的最早登录日期
select userId, a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
第三步:创建辅助列delta_time,用登录日期列减去最早登录日期first_time,得到留存天数
select
userId,
a_createdTime,
first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time,
datediff(a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime)) delta_time
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
第四步:按登录日期统计不同留存天数对应的次数即某日的近N日留存数
select
t1.first_time,
sum(case when t1.delta_time = 1 THEN 1 ELSE 0 END) day_1,
sum(case when t1.delta_time = 2 THEN 1 ELSE 0 END) day_2,
sum(case when t1.delta_time = 3 THEN 1 ELSE 0 END) day_3,
sum(case when t1.delta_time = 5 THEN 1 ELSE 0 END) day_5,
sum(case when t1.delta_time = 7 THEN 1 ELSE 0 END) day_7,
sum(case when t1.delta_time = 30 THEN 1 ELSE 0 END) day_30
from (
select
userId,
a_createdTime,
first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time,
datediff(a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime)) delta_time
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
) t1
group by t1.first_time
order by t1.first_time
第五步:用某日的近N日留存数除以首日登录人数即留存率
select
t1.first_time,
sum(case when t1.delta_time = 1 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_1,
sum(case when t1.delta_time = 2 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_2,
sum(case when t1.delta_time = 3 THEN 1 ELSE 0 END)/ count(DISTINCT t1.userId) day_3,
sum(case when t1.delta_time = 5 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_5,
sum(case when t1.delta_time = 7 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_7,
sum(case when t1.delta_time = 30 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_30
from (
select
userId,
a_createdTime,
first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time,
datediff(a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime)) delta_time
from (
select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)
) t0
) t1
group by t1.first_time
order by t1.first_time