Session重叠问题学习(六)--极致优化
接前文
Session重叠问题学习(二),这是问题和需求的描述,执行时间90秒
http://blog.itpub.net/29254281/viewspace-2150229/
Session重叠问题学习(三)--优化,一次优化后,执行时间25秒
http://blog.itpub.net/29254281/viewspace-2150259/
Session重叠问题学习(四)--再优化,二次优化后,执行时间10秒
http://blog.itpub.net/29254281/viewspace-2150297/
Session重叠问题学习(五)--最优化,三次优化后,执行时间1.6秒
http://blog.itpub.net/29254281/viewspace-2150339/
周五晚上终于把这个算法初步实现了.
连续加班忙碌了一个星期,终于有点曙光了.
从这个问题的缘起,到目前应该已经优化了快100倍了
但是周末的时候,想想还是不对.
小花狸Session合并算法(对,以后这个算法就叫这个名称了) 实现的合并速度应该是非常快的.代价仅仅是扫描一遍记录.
这1.6秒到底用在哪里了?
后来经过反复调试.发现还有两块可以优化改进的地方.
改进后的过程如下:
drop procedure p;
DELIMITER $$
CREATE DEFINER=`root`@`localhost` PROCEDURE `p`()
BEGIN
declare done int default 0;
declare v_roomid bigint ;
declare v_time timestamp (6);
declare v_cur_type smallint ;
declare v_before_roomid bigint default -1;
declare v_before_type smallint default -1;
declare v_before_time timestamp (6) ;
declare v_num bigint default 0;
declare cur_test CURSOR for select roomid,type,timepoint from tmp_time_point order by roomid,timepoint,type ;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
drop table if exists t1;
drop table if exists t2;
drop table if exists tmp_time_point;
drop table if exists tmp_result;
drop table if exists tmp_min_range;
drop table if exists tmp_s;
CREATE temporary TABLE `t1` (
`roomid` int (11) NOT NULL DEFAULT '0' ,
`userid` bigint (20) NOT NULL DEFAULT '0' ,
`s` timestamp (6),
`e` timestamp (6),
primary key (roomid,userid,s,e)
) ENGINE=memory;
CREATE temporary TABLE `t2` (
`roomid` int (11) NOT NULL DEFAULT '0' ,
`s` timestamp (6),
`e` timestamp (6)
) ENGINE=memory;
CREATE temporary TABLE `tmp_min_range` (
`roomid` int (11) NOT NULL DEFAULT '0' ,
`s` timestamp (6),
`e` timestamp (6),
primary key (roomid,s,e),
key (roomid,e)
) ENGINE=memory;
create temporary table tmp_time_point(
roomid bigint ,
timepoint timestamp (6),
type smallint ,
key (roomid,timepoint)
) engine=memory;
create temporary table tmp_result(
roomid bigint ,
timepoint timestamp (6),
c int
) engine=memory;
create temporary table tmp_s(
roomid bigint ,
userid bigint ,
s timestamp ,
e timestamp ,
i int
) engine=memory;
SET @A=0;
SET @B=0;
insert into tmp_s
SELECT x.roomid,x.userid,s,e,datediff(e,s)+1 i
FROM
(
(
SELECT @B:=@B+1 AS id,roomid,userid,s
FROM (
SELECT DISTINCT roomid, userid, roomstart AS s
FROM u_room_log a
WHERE NOT EXISTS (SELECT *
FROM u_room_log b
WHERE a.roomid = b.roomid
AND a.userid = b.userid
AND a.roomstart > b.roomstart
AND a.roomstart <= b.roomend)
) AS p
) AS x,
(
SELECT @A:=@A+1 AS id,roomid,userid,e
FROM
(
SELECT DISTINCT roomid, userid, roomend AS e
FROM u_room_log a
WHERE NOT EXISTS (SELECT *
FROM u_room_log b
WHERE a.roomid = b.roomid
AND a.userid = b.userid
AND a.roomend >= b.roomstart
AND a.roomend < b.roomend)
) AS o
) AS y
)
WHERE x.id = y.id AND x.roomid = y.roomid AND x.userid = y.userid ;
select max (i) into @c from tmp_s;
insert ignore into t1(roomid,userid,s,e)
select
roomid, userid,
if(date (s)!=date (e) and id>1,date (s+interval id-1 day ),s) s,
if(date (s+interval id-1 day )=date (e) ,e,date_format(s+interval id-1 day ,'%Y-%m-%d 23:59:59' )) e
from tmp_s t1 STRAIGHT_JOIN
nums on (nums.id<=t1.i)
where nums.id<=@c
;
insert into t2 (roomid,s,e)
select roomid,
s+interval startnum/1000000 second s,
e-interval endnum/1000000 second e
from (
select
roomid,
s,e,
startnum,
case when @eflag=eflag then @rn:=@rn+1 when @eflag:=eflag then @rn else @rn end endnum
from (
select * from (
select case when @sflag=sflag then @rn:=@rn+1 when @sflag:=sflag then @rn else @rn end startnum,roomid,s,e,sflag,eflag from
(
select * from
(
select t1.*,concat('[' ,roomid,'],' ,s) sflag,concat('[' ,roomid,'],' ,e) eflag from t1 order by roomid ,sflag
)a,(select @sflag:='' ,@rn:=0,@eflag:='' ) vars
) b
) bb order by roomid,eflag
) c
) d ;
insert into tmp_time_point(roomid,timepoint,type) select roomid,s,1 from t2;
insert into tmp_time_point(roomid,timepoint,type) select roomid,e,0 from t2;
insert ignore into tmp_min_range(roomid,s,e)
select roomid,starttime starttime, endtime endtime from (
select
if(@roomid=roomid,@d,'' ) as starttime,@d:=str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f' ),@roomid:=roomid,p.roomid,str_to_date(timepoint,'%Y-%m-%d %H:%i:%s.%f' ) endtime
from tmp_time_point p,(select @d:='' ,@roomid:=-1) vars
order by roomid,timepoint
) v4 where starttime!='' and date (starttime)=date (endtime);
open cur_test;
repeat
fetch cur_test into v_roomid,v_cur_type,v_time;
if done !=1 then
-- 第一行或者每个房间的第一行
if v_before_roomid=-1 or v_roomid!=v_before_roomid then
set v_before_roomid:=v_roomid;
set v_before_type:=1;
set v_before_time:='0000-00-00 00:00:00' ;
set v_num:=0;
end if;
if v_before_type=1 then
set v_num:=v_num+1;
insert into tmp_result(roomid,timepoint,c) values (v_roomid,v_time,v_num);
end if;
if v_before_type=0 then
set v_num:=v_num-1;
insert into tmp_result(roomid,timepoint,c) values (v_roomid,v_time,v_num);
end if;
set v_before_roomid:=v_roomid;
set v_before_type:=v_cur_type;
set v_before_time:=v_time;
end if;
until done end repeat;
close cur_test;
select roomid,date (s) dt,round(sum (timestampdiff(second ,date_format(s,'%Y-%m-%d %H:%i:%s' ),date_format(e,'%Y-%m-%d %H:%i:%s' )))/60) ts,max (c)-1 c from (
select a.roomid,a.s,a.e,r.c,r.timepoint from tmp_result r
inner join
tmp_min_range a on ( r.timepoint=a.e and r.roomid=a.roomid)
where c>2
) a group by roomid,date (s);
END
第一处改进
原来同一房间同一用户重叠时间合并,然后再拆分跨天数据,用的是一条SQL
现在改进如下
create temporary table tmp_s(
roomid bigint ,
userid bigint ,
s timestamp ,
e timestamp ,
i int
) engine=memory;
SET @A=0;
SET @B=0;
insert into tmp_s
SELECT x.roomid,x.userid,s,e,datediff(e,s)+1 i
FROM
(
(
SELECT @B:=@B+1 AS id,roomid,userid,s
FROM (
SELECT DISTINCT roomid, userid, roomstart AS s
FROM u_room_log a
WHERE NOT EXISTS (SELECT *
FROM u_room_log b
WHERE a.roomid = b.roomid
AND a.userid = b.userid
AND a.roomstart > b.roomstart
AND a.roomstart <= b.roomend)
) AS p
) AS x,
(
SELECT @A:=@A+1 AS id,roomid,userid,e
FROM
(
SELECT DISTINCT roomid, userid, roomend AS e
FROM u_room_log a
WHERE NOT EXISTS (SELECT *
FROM u_room_log b
WHERE a.roomid = b.roomid
AND a.userid = b.userid
AND a.roomend >= b.roomstart
AND a.roomend < b.roomend)
) AS o
) AS y
)
WHERE x.id = y.id AND x.roomid = y.roomid AND x.userid = y.userid ;
select max (i) into @c from tmp_s;
insert ignore into t1(roomid,userid,s,e)
select
roomid, userid,
if(date (s)!=date (e) and id>1,date (s+interval id-1 day ),s) s,
if(date (s+interval id-1 day )=date (e) ,e,date_format(s+interval id-1 day ,'%Y-%m-%d 23:59:59' )) e
from tmp_s t1 STRAIGHT_JOIN
nums on (nums.id<=t1.i)
where nums.id<=@c
;
先把同一房间同一用户的重叠部分合并,然后暂存临时表
记录最大的间隔时间,然后再拆分数据
拆分数据的时候 使用STRAIGHT_JOIN 强制连接顺序.
这样避免因为数字辅助表过大,而导致性能陡然变差.
第二处改进
原来使用distinct的查询, 都改为在临时表上增加主键.
然后使用insert ignore into 代替 insert into
这样大概优化了300毫秒
经过反复优化之后,执行时间大致稳定在1250毫秒 至 1300 毫秒
各个部分耗时分析如下
填充tmp_s,合并同一房间同一用户的重叠部分,耗时655毫秒
填充t1,拆分跨天的用户数据,耗时62毫秒
填充t2,用户时间段首尾相交或者首尾 全部重合的数据拆分,耗时140毫秒
填充tmp_min_range,计算最小间隔范围,耗时156毫秒
小花狸Session合并算法, 耗时219毫秒
结果统计展示,耗时47毫秒
正文到此结束