- 1. 初始化数据
- 1. 初始化数据
- 2. 每组会话开始时间
- 3. 每组会话结束时间
- 4. 合并,最终sql如下
- 5. 优化sql,视图处理
- 6. 优化效率,自增长列
- 7. 优化效率,临时表,没怎么看
1. 初始化数据
# 建立session表 DROP TABLE IF EXISTS session; CREATE TABLE sessions ( id INT NOT NULL AUTO_INCREMENT, app VARCHAR(10) NOT NULL, usr VARCHAR(10) NOT NULL, starttime TIME NOT NULL, endtime TIME NOT NULL, PRIMARY KEY(id) ); DELETE FROM sessions; # 插入数据 INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '08:30', '10:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '08:30', '08:35'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '09:00', '09:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '09:15', '10:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '09:15', '09:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '10:30', '14:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr1', '10:45', '11:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app1', 'usr2', '11:00', '12:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '08:30', '08:45'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '09:00', '09:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '11:45', '12:00'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '12:30', '14:00'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '12:45', '13:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '13:00', '14:00'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr1', '14:00', '16:30'); INSERT INTO sessions(app, usr, starttime, endtime) VALUES('app2', 'usr2', '15:30', '17:00'); # 创建索引 CREATE UNIQUE INDEX idx_app_usr_s_e_key ON sessions(app, usr, starttime, endtime, id); CREATE INDEX idx_app_s_e ON sessions(app, starttime, endtime)
2. 每组会话开始时间
服务提供商可能允许多个session的连接,并把其计费统计为1次,这就是所谓的分组重叠。对于上面的例子,应该把app1、user1在08:30~10:30间的3次会话算为一次会话。
我们分步骤来讨论这个问题,先求出每个会话组开始时间,并用DISTINCT返回不重复的开始时间,具体过程如下:
SELECT DISTINCT app, usr, starttime AS a FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app = b.app AND a.usr = b.usr AND a.starttime > b.starttime AND a.starttime <= b.endtime )
3. 每组会话结束时间
SELECT DISTINCT app, usr, endtime AS a FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime )
4. 合并,最终sql如下:
SELECT DISTINCT s.app, s.usr, s.s, ( SELECT MIN(e) FROM ( SELECT DISTINCT app, usr, endtime AS e FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime ) ) AS s2 WHERE s2.e>s.s AND s.app=s2.app AND s.usr=s2.usr ) AS e FROM ( SELECT DISTINCT app, usr, starttime AS s FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app = b.app AND a.usr = b.usr AND a.starttime > b.starttime AND a.starttime <= b.endtime ) ) AS s, ( SELECT DISTINCT app, usr, endtime AS e FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime ) ) AS e WHERE s.app = e.app AND s.usr=e.usr
5. 优化sql,视图处理
以上代码看起来非常复杂,因为需要使用多个子查询,实际上可以通过创建视图来行简化上述的SQL查询。可以进行这样的操作,将上述两个会话组开始和结束的临时表定义为视图,然后再进一步操作,如下:
# 开始 CREATE VIEW v_s AS SELECT DISTINCT app, usr, starttime AS s FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app = b.app AND a.usr = b.usr AND a.starttime > b.starttime AND a.starttime <= b.endtime ); # 结束 CREATE VIEW v_e AS SELECT DISTINCT app, usr, endtime AS e FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime ); # 查询 SELECT DISTINCT s.app, s.usr, s.s, ( SELECT MIN(e) FROM v_e AS s2 WHERE s2.e>s.s AND s.app=s2.app AND s.usr=s2.usr ) AS e FROM v_s AS s, v_e AS e WHERE s.app = e.app AND s.usr=e.usr;
6. 优化效率,自增长列
用视图作为派生表有一个缺点,那就是如果在一个查询中需要反复查询一个视图,那么这张视图需要被计算多次,如图,有这些东西应该是引起重视:
可以看到v_e这张视图被计算了两次,如果表中的记录非常大,那么需要提高视图的执行效率。怎么才能避免一个视图被查询多次呢?有一种方法是使用临时表,将v_s、v_e两张视图作为临时表。如果使用了临时表,那么将不再需要MIN函数的这个子查询,因为我们可以对临时表增加一个自增长的列,然后进行匹配即可。存储过程如下所示:
SET @A=0; SET @B=0; SELECT DISTINCT xx.app, xx.usr, s, e FROM ( SELECT @B:=@B+1 AS id, app, usr, s FROM ( SELECT DISTINCT app, usr, starttime AS s FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app = b.app AND a.usr = b.usr AND a.starttime > b.starttime AND a.starttime <= b.endtime ) ) AS p ) AS xx , ( SELECT @A:=@A+1 AS id, app, usr, e FROM ( SELECT DISTINCT app, usr, endtime AS e FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime ) ) AS o ) yy WHERE xx.id=xx.id AND xx.app = yy.app AND xx.usr=xx.usr
看图,3个减少到2个了,
7. 优化效率,临时表,没怎么看
CREATE PROCEDURE pGroupOverlap() BEGIN DROP TABLE IF EXISTS $s; DROP TABLE IF EXISTS $e; CREATE TEMPORARY TABLE $s( id INT AUTO_INCREMENT, app VARCHAR(10), usr VARCHAR(10), starttime TIME, PRIMARY KEY(id), KEY(usr, app) )ENGINE=MEMORY; CREATE TEMPORARY TABLE $e( id INT AUTO_INCREMENT, app VARCHAR(10), usr VARCHAR(10), endtime TIME, PRIMARY KEY(id), KEY(usr, app) )ENGINE=MEMORY; INSERT INTO $s(app, usr, starttime)( SELECT DISTINCT app, usr, starttime AS s FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app = b.app AND a.usr = b.usr AND a.starttime > b.starttime AND a.starttime <= b.endtime ) ); INSERT INTO $e(app, usr, endtime)( SELECT DISTINCT app, usr, endtime AS e FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime ) ); END -- 查询 SELECT s.app, s.usr, starttime, endtime FROM $s AS s, $e AS e WHERE s.app=e.app AND s.usr=e.usr AND s.id=e.id;
7. 优化效率,临时表,没怎么看
CREATE PROCEDURE pGroupOverlap() BEGIN DROP TABLE IF EXISTS $s; DROP TABLE IF EXISTS $e; CREATE TEMPORARY TABLE $s( id INT AUTO_INCREMENT, app VARCHAR(10), usr VARCHAR(10), starttime TIME, PRIMARY KEY(id), KEY(usr, app) )ENGINE=MEMORY; CREATE TEMPORARY TABLE $e( id INT AUTO_INCREMENT, app VARCHAR(10), usr VARCHAR(10), endtime TIME, PRIMARY KEY(id), KEY(usr, app) )ENGINE=MEMORY; INSERT INTO $s(app, usr, starttime)( SELECT DISTINCT app, usr, starttime AS s FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app = b.app AND a.usr = b.usr AND a.starttime > b.starttime AND a.starttime <= b.endtime ) ); INSERT INTO $e(app, usr, endtime)( SELECT DISTINCT app, usr, endtime AS e FROM sessions AS a WHERE NOT EXISTS( SELECT * FROM sessions AS b WHERE a.app=b.app AND a.usr=b.usr AND a.endtime>=b.starttime AND a.endtime<b.endtime ) ); END -- 查询 SELECT s.app, s.usr, starttime, endtime FROM $s AS s, $e AS e WHERE s.app=e.app AND s.usr=e.usr AND s.id=e.id;