Session重叠问题学习(八)--该问题第六次优化和Oracle版本
已经说好不玩了.还是不能罢手.
之前极致优化的方法,是先计算最小区间,再计算最小区间上的用户数量.
其实可以一边计算最小区间,同时计算最小区间上的用户数量.
-
DELIMITER $$
-
-
CREATE DEFINER=`root`@`localhost` PROCEDURE `p`()
-
BEGIN
-
drop table if exists t1;
-
drop table if exists tmp_time_point;
-
drop table if exists tmp_s;
-
CREATE temporary TABLE `t1` (
-
`roomid` int(11) NOT NULL DEFAULT '0',
-
`userid` bigint(20) NOT NULL DEFAULT '0',
-
`s` timestamp,
-
`e` timestamp,
-
primary key(roomid,userid,s,e)
-
) ENGINE=memory;
-
-
-
create temporary table tmp_time_point(
-
roomid bigint,
-
timepoint timestamp,
-
type smallint,
-
key(roomid,timepoint)
-
) engine=memory;
-
-
create temporary table tmp_s(
-
roomid bigint,
-
userid bigint,
-
s timestamp,
-
e timestamp,
-
i int
-
) engine=memory;
-
-
SET @A=0;
-
SET @B=0;
-
-
insert into tmp_s
-
SELECT x.roomid,x.userid,s,e,datediff(e,s)+1 i
-
FROM
-
(
-
(
-
SELECT @B:=@B+1 AS id,roomid,userid,s
-
FROM (
-
SELECT DISTINCT roomid, userid, roomstart AS s
-
FROM u_room_log a
-
WHERE NOT EXISTS (SELECT *
-
FROM u_room_log b
-
WHERE a.roomid = b.roomid
-
AND a.userid = b.userid
-
AND a.roomstart > b.roomstart
-
AND a.roomstart <= b.roomend)
-
) AS p
-
) AS x,
-
(
-
SELECT @A:=@A+1 AS id,roomid,userid,e
-
FROM
-
(
-
SELECT DISTINCT roomid, userid, roomend AS e
-
FROM u_room_log a
-
WHERE NOT EXISTS (SELECT *
-
FROM u_room_log b
-
WHERE a.roomid = b.roomid
-
AND a.userid = b.userid
-
AND a.roomend >= b.roomstart
-
AND a.roomend < b.roomend)
-
) AS o
-
) AS y
-
)
-
WHERE x.id = y.id AND x.roomid = y.roomid AND x.userid = y.userid ;
-
-
select max(i) into @c from tmp_s;
-
-
insert ignore into t1(roomid,userid,s,e)
-
select
-
roomid, userid,
-
if(date(s)!=date(e) and id>1,date(s+interval id-1 day),s) s,
-
if(date(s+interval id-1 day)=date(e) ,e,date_format(s+interval id-1 day,'%Y-%m-%d 23:59:59')) e
-
from tmp_s t1 STRAIGHT_JOIN
-
nums on(nums.id<=t1.i)
-
where nums.id<=@c
-
-
;
-
-
-- 开始点+1,结束点-1
-
insert into tmp_time_point(roomid,timepoint,type) select roomid,s,1 from t1;
-
insert into tmp_time_point(roomid,timepoint,type) select roomid,e,-1 from t1;
-
-
select roomid,date(s) dt,round(sum(timestampdiff(second,date_format(s,'%Y-%m-%d %H:%i:%s'),date_format(e,'%Y-%m-%d %H:%i:%s')))/60) ts,max(rn) c from (
-
select
-
if(@roomid=roomid,@d,'') as s,@d:=str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f'),@roomid:=roomid,p.roomid,str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f') e ,rn
-
from
-
(
-
select round(case when @roomid=roomid then @rn:=@rn+prevType when @roomid:=roomid then @rn:=prevType end) rn,b.prevType,roomid,timepoint,type from (
-
select if(@roomid=roomid,@type,0) prevType ,case when @roomid=roomid then @type:=type when @roomid:=roomid then @type:=1 end,a.roomid,timepoint,type from (
-
select * from ( select roomid,timepoint,sum(type) type from tmp_time_point group by roomid,timepoint) tmp_time_point,(select @roomid:=-1,@rn:=0,@type:=0) vars order by roomid ,timepoint
-
) a
-
) b order by roomid ,timepoint
-
)p,(select @d:='',@roomid:=-1) vars
-
order by roomid,timepoint
-
) v4 where s!='' and date(s)=date(e) and rn>=2
-
group by roomid,date(s);
-
-
END
到了这个MySQL的版本,确实已经非常的晦涩难懂了.
由于MySQL没有提供开窗函数,这种复杂计算非常的吃亏.
这个最后的版本,执行时间在 889毫秒到921毫秒之间.
MySQL 版本 最后还能优化小100毫秒,非常的不容易.
这个版本过于晦涩,我用同样的思路,写了一个Oracle版本的.
性能好,可读性还强.
-
with c1 as -- 合并同一房间同一用户的重叠时间段,用于统计峰值人数
-
(
-
select distinct roomid,userid,min(s) s,max(e) e
-
from (select roomid,userid,s,e,
-
sum(broken) over (partition by roomid, userid order by s,e) flag
-
from (select t.*,
-
(case when s <= max(e) over (partition by roomid, userid order by s,e rows between unbounded preceding and 1 preceding) then 0
-
else 1
-
end) as broken
-
from (select roomid,userid,roomstart s,roomend e from u_room_log ) t
-
) t
-
) t
-
group by roomid,userid,flag
-
),
-
c2 as -- 拆分跨天的时间段
-
(
-
select *
-
from (select roomid,userid,s,e
-
from c1
-
where trunc(s) = trunc(e) -- 不跨天
-
union all
-
select roomid,userid,
-
case when id = 1 then s else trunc(s)+id-1 end s,
-
case when id = m2 then e else (trunc(s)+id) -(1/24/60/60) end e
-
from (select roomid,userid,s,e,id,
-
max(id) over (partition by roomid,userid,s) m2
-
from c1,nums
-
where trunc(s) <> trunc(e) -- 跨天
-
and id <= trunc(e)-trunc(s)+1) t1) t1
-
),
-
c3 as -- 在计算最小范围的同时,计算区间用户数
-
(
-
-
select roomid,ts endtime,sum(prevType) over(partition by roomid order by ts) rn,
-
lag(ts) over (partition by roomid order by ts) starttime
-
from (
-
select a.*,nvl(lag(type) over (partition by roomid order by ts),0) prevType
-
from (
-
select
-
roomid,ts,sum(type) type
-
from (
-
select roomid,e ts, -1 type
-
from c2
-
union all
-
select roomid,s ts, 1 type
-
from c2
-
) group by roomid,ts
-
) a
-
) c
-
)
-
select roomid,to_char(dt,'yyyy-mm-dd HH24:mi:ss') dt,round(sum(dur)/60) ts,max(rn) c from (
-
select roomid,trunc(starttime) dt,(endtime-starttime)*3600*24 dur,rn
-
from c3 where rn>=2 and trunc(endtime)=trunc(starttime) and starttime is not null
-
order by roomid,endtime
-
)
-
group by roomid,dt
-
order by roomid,dt;
不像MySQL,需要考虑所有的细节.这个同样算法的Oracle版本,随随便便就能跑到213毫秒.
正文到此结束