Session重叠问题学习(二)
u_room_log.zip
附件是实验的初始化表.另外需要先创建数字辅助表
-
create table nums(id int not null primary key);
-
-
delimiter $$
-
create procedure pCreateNums(cnt int)
-
begin
-
declare s int default 1;
-
truncate table nums;
-
while s<=cnt do
-
insert into nums select s;
-
set s=s+1;
-
end while;
-
end $$
-
delimiter ;
-
-
call pCreateNums(1000);
-
commit;
数字辅助表基本上是复杂SQL的标配.
以roomid为1 的数据了解需求
求每天每个房间同时两个用户在线的时长和最高在线人数
需要解决的几个问题
1.用户数据跨天。
比如 logid=634715的记录,该用户登录房间是
2018-01-15 15:29:55 至 2018-01-16 15:30:00
那么,这条数据需要拆分为两个记录
2018-01-15 15:29:55 至 2018-01-15 23:59:59
和
2018-01-16 00:00:00 至 2018-01-16 15:30:00
2.同一天同一房间同一用户的时间重叠
比如某个用户有两个记录
一个是 3点-5点登录;另外一个记录是4点-7点登录.
这两个记录需要合并为一个记录 就是该用户 3点-7点在线.
3.每天每个房间同时有两个用户在线,才算房间的活跃时间.
(后续可能扩展,比如四个在线用户才算活跃时间,这就是产品随口一说的事儿。不能因为她拍脑袋就得改程序改死自己。)
最后统计每天每个房间的活跃时间和最大在线人数.
我处理这个问题分了两个步骤.
第一个步骤,先合并同一房间同一用户登录时间的重叠部分.然后拆分跨天的记录.最后输出到一个结果表中.(表名 t1)
第二个步骤,通过一个自定义函数,处理第一步的结果表(t1).
首先,把某天某个房间,任意两个用户时间范围存在交叉的记录找出来.
(因为需求说明,两个用户同时在线才算活跃时间,所以时间范围有交叉是必要条件.如果一个用户在线就算房间的在线时间的话,就更容易一点,按照步骤一的算法,再次合并相同房间的时间范围即是)
然后把这些记录的时间点生成一个排序的列(固定行数行转列),
生成每个时间点到下个时间点的范围
这个范围是算法的最核心部分,他是某天某个房间所有记录的最小范围间隔.
也就是说,在某天某个房间,任何用户的时间范围不会和最小范围有重叠的部分.
最后用最小范围关联 t1的结果表,如果用户的在线时间和最小范围重合,就将重叠的最小范围和用户ID,房间ID写入最终结果表t2.
t2这个结果表,最终包含了某个房间某个用户一个或者多个的最小范围.
最终的查询就是聚合房间和最小范围,找到超过两个用户的记录,然后对用户求max得到最大在线人数.对最小范围的间隔时间求和得到房间的活跃时间.
步骤一:
-
drop table t1;
-
create table t1
-
select distinct
-
roomid,
-
userid,
-
if(date(s)!=date(e) and id>1,date(s+interval id-1 day),s) s,
-
if(date(s+interval id-1 day)=date(e) ,e,date_format(s+interval id-1 day,'%Y-%m-%d 23:59:59')) e
-
from (
-
SELECT DISTINCT s.roomid, s.userid, s.s, (
-
SELECT MIN(e)
-
FROM (SELECT DISTINCT roomid, userid, roomend AS e
-
FROM u_room_log a
-
WHERE NOT EXISTS (SELECT *
-
FROM u_room_log b
-
WHERE a.roomid = b.roomid
-
AND a.userid = b.userid
-
AND a.roomend >= b.roomstart
-
AND a.roomend < b.roomend)
-
) s2
-
WHERE s2.e > s.s
-
AND s.roomid = s2.roomid
-
AND s.userid = s2.userid
-
) AS e
-
FROM (SELECT DISTINCT roomid, userid, roomstart AS s
-
FROM u_room_log a
-
WHERE NOT EXISTS (SELECT *
-
FROM u_room_log b
-
WHERE a.roomid = b.roomid
-
AND a.userid = b.userid
-
AND a.roomstart > b.roomstart
-
AND a.roomstart <= b.roomend)
-
) s, (SELECT DISTINCT roomid, userid, roomend AS e
-
FROM u_room_log a
-
WHERE NOT EXISTS (SELECT *
-
FROM u_room_log b
-
WHERE a.roomid = b.roomid
-
AND a.userid = b.userid
-
AND a.roomend >= b.roomstart
-
AND a.roomend < b.roomend)
-
) e
-
WHERE s.roomid = e.roomid
-
AND s.userid = e.userid
-
-
) t1 ,
-
nums
-
where nums.id<=datediff(e,s)+1
-
-
;
-
-
alter table t1 add key(roomid,s,e),add primary key(roomid,userid,s,e);
步骤二:
-
DELIMITER $$
-
-
CREATE DEFINER=`root`@`localhost` FUNCTION `f`(pRoomId bigint,pTime timestamp) RETURNS int(11)
-
BEGIN
-
declare pResult bigint;
-
insert into t2
-
select v6.roomid,v6.userid,greatest(s,starttime) s,least(e,endtime) e
-
from (
-
select roomid,starttime,endtime from (
-
select @d as starttime,@d:=d,v3.roomid,v3.d endtime from (
-
select distinct roomid,
-
case
-
when nums.id=1 then v1s
-
when nums.id=2 then v1e
-
when nums.id=3 then v2s
-
when nums.id=4 then v2e
-
end d from (
-
select v1.roomid, v1.s v1s,v1.e v1e,v2.s v2s,v2.e v2e
-
from t1 v1
-
inner join t1 v2 on ((v1.s between v2.s and v2.e or v1.e between v2.s and v2.e ) and v1.roomid=v2.roomid)
-
where v2.roomid=pRoomId and v2.s>=pTime and v2.s<(pTime+interval '1' day) and (v2.roomid,v2.userid,v2.s,v2.e)!= (v1.roomid,v1.userid,v1.s,v1.e)
-
) a,nums where nums.id<=4
-
order by roomid,d
-
) v3,(select @d:='') vars
-
) v4 where starttime!=''
-
) v5 inner join t1 v6 on(v5.starttime between v6.s and v6.e and v5.endtime between v6.s and v6.e and v5.roomid=v6.roomid)
-
;
-
-
select row_count() into pResult;
-
RETURN pResult;
-
END $$
执行函数
select f(roomid,s) from (
select distinct roomid,date(s) s from t1
) a;
最终查询得到结果
-
select roomid,date(s) dt,round(sum(timestampdiff(second,s,e))/60) ts,max(c) c from (
-
select roomid,s,e ,count(distinct userid) c from t2 where roomid=660026 group by roomid,s,e having count(distinct userid)>1
-
) a group by roomid,date(s);
正文到此结束