0
点赞
收藏
分享

微信扫一扫

SQL分析——近N日留存

西特张 2022-04-13 阅读 36
sql

近N日留存

现有用户登录表user_active_log一份,里面有两个字段:userId(用户ID),createdTime(登录日期),需要统计近1,2,3,5,7,30日留存用户数量及留存率。
在这里插入图片描述

思路:登录日期减去第一个日期的差值

第一步:选取12月份的记录,根据用户id和登录日期去重

select userId, SUBSTR(createdTime, 1, 10) a_createdTime
from user_active_log
where SUBSTR(createdTime, 1, 7) = '2021-12'
group by userId, SUBSTR(createdTime, 1, 10)

在这里插入图片描述

第二步:创建新列first_time,获取每个userId下的最早登录日期

select userId, a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time
from (
	select userId, SUBSTR(createdTime, 1, 10) a_createdTime
	from user_active_log
	where SUBSTR(createdTime, 1, 7) = '2021-12'
	group by userId, SUBSTR(createdTime, 1, 10)
) t0

在这里插入图片描述

第三步:创建辅助列delta_time,用登录日期列减去最早登录日期first_time,得到留存天数

select 
userId, 
a_createdTime, 
first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time,
datediff(a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime)) delta_time
from (
	select userId, SUBSTR(createdTime, 1, 10) a_createdTime
	from user_active_log
	where SUBSTR(createdTime, 1, 7) = '2021-12'
	group by userId, SUBSTR(createdTime, 1, 10)
) t0

在这里插入图片描述

第四步:按登录日期统计不同留存天数对应的次数即某日的近N日留存数

select
	t1.first_time,
	sum(case when t1.delta_time = 1 THEN 1 ELSE 0 END) day_1,
	sum(case when t1.delta_time = 2 THEN 1 ELSE 0 END) day_2,
	sum(case when t1.delta_time = 3 THEN 1 ELSE 0 END) day_3,
	sum(case when t1.delta_time = 5 THEN 1 ELSE 0 END) day_5,
	sum(case when t1.delta_time = 7 THEN 1 ELSE 0 END) day_7,
	sum(case when t1.delta_time = 30 THEN 1 ELSE 0 END) day_30
from (
	select 
	userId, 
	a_createdTime, 
	first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time,
	datediff(a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime)) delta_time
	from (
		select userId, SUBSTR(createdTime, 1, 10) a_createdTime
		from user_active_log
		where SUBSTR(createdTime, 1, 7) = '2021-12'
		group by userId, SUBSTR(createdTime, 1, 10)
	) t0
) t1
group by t1.first_time
order by t1.first_time

在这里插入图片描述

第五步:用某日的近N日留存数除以首日登录人数即留存率

select
	t1.first_time,
	sum(case when t1.delta_time = 1 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_1,
	sum(case when t1.delta_time = 2 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_2,
	sum(case when t1.delta_time = 3 THEN 1 ELSE 0 END)/ count(DISTINCT t1.userId) day_3,
	sum(case when t1.delta_time = 5 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_5,
	sum(case when t1.delta_time = 7 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_7,
	sum(case when t1.delta_time = 30 THEN 1 ELSE 0 END) / count(DISTINCT t1.userId) day_30
from (
	select 
	userId, 
	a_createdTime, 
	first_value(a_createdTime) over (PARTITION by userId order by a_createdTime) first_time,
	datediff(a_createdTime, first_value(a_createdTime) over (PARTITION by userId order by a_createdTime)) delta_time
	from (
		select userId, SUBSTR(createdTime, 1, 10) a_createdTime
		from user_active_log
		where SUBSTR(createdTime, 1, 7) = '2021-12'
		group by userId, SUBSTR(createdTime, 1, 10)
	) t0
) t1
group by t1.first_time
order by t1.first_time

在这里插入图片描述

举报

相关推荐

0 条评论