mysql实现开窗函数
开窗分析函数简单来说就是类似partition by aaa, bbb order by ccc这样的形式
在使用mysql的过程中遇到过这样的需求,需要求出一段时间内每个item的最大值及对应的那一条记录。最大值很容易使用group by item求出来,但是求出那一条对应的所有字段及值却不好实现。经过查资料及探索发现mysql也可以实现,思路是人为构造一个字段来标记顺序,这样不仅能实现最大,还能实现TOP N。记录下简单示例:
准备数据
# 建表
CREATE TABLE `policy_summary` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`policy_name` varchar(18) COLLATE utf8_bin DEFAULT NULL,
`publish_time` datetime DEFAULT NULL,
`platform_id` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
)
# 插入数据
INSERT INTO `policy_summary` VALUES ('1', 'test0', '2019-04-01 15:11:00', '2');
INSERT INTO `policy_summary` VALUES ('2', 'test9', '2019-04-01 19:11:00', '2');
INSERT INTO `policy_summary` VALUES ('3', 'test8', '2019-04-01 19:11:00', '2');
INSERT INTO `policy_summary` VALUES ('4', 'test7', '2019-04-01 18:11:00', '2');
INSERT INTO `policy_summary` VALUES ('5', 'test1', '2019-04-01 15:11:00', '2');
INSERT INTO `policy_summary` VALUES ('6', 'test2', '2019-04-01 15:11:00', '2');
INSERT INTO `policy_summary` VALUES ('7', 'test3', '2019-04-01 16:11:00', '2');
INSERT INTO `policy_summary` VALUES ('8', 'test4', '2019-04-01 16:11:00', '2');
INSERT INTO `policy_summary` VALUES ('9', 'test5', '2019-04-01 16:11:00', '2');
INSERT INTO `policy_summary` VALUES ('10', 'test6', '2019-04-01 17:11:00', '2');
INSERT INTO `policy_summary` VALUES ('11', 'fengyu', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('12', '新建政策1111111', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('13', '1218测试', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('14', 'xxx', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('15', 'ccc', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('16', '测试114', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('17', '测试mmm', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('18', 'kkkkkkk', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('19', 'kkkkkkk明明', '2019-04-02 19:17:00', '8');
INSERT INTO `policy_summary` VALUES ('20', 'ceshi111', '2019-04-02 19:17:00', '8');
# 查看
mysql> select * from policy_summary;
+----+-----------------+---------------------+-------------+
| id | policy_name | publish_time | platform_id |
+----+-----------------+---------------------+-------------+
| 1 | test0 | 2019-04-01 15:11:00 | 2 |
| 2 | test9 | 2019-04-01 19:11:00 | 2 |
| 3 | test8 | 2019-04-01 19:11:00 | 2 |
| 4 | test7 | 2019-04-01 18:11:00 | 2 |
| 5 | test1 | 2019-04-01 15:11:00 | 2 |
| 6 | test2 | 2019-04-01 15:11:00 | 2 |
| 7 | test3 | 2019-04-01 16:11:00 | 2 |
| 8 | test4 | 2019-04-01 16:11:00 | 2 |
| 9 | test5 | 2019-04-01 16:11:00 | 2 |
| 10 | test6 | 2019-04-01 17:11:00 | 2 |
| 11 | fengyu | 2019-04-02 19:17:00 | 8 |
| 12 | 新建政策1111111 | 2019-04-02 19:17:00 | 8 |
| 13 | 1218测试 | 2019-04-02 19:17:00 | 8 |
| 14 | xxx | 2019-04-02 19:17:00 | 8 |
| 15 | ccc | 2019-04-02 19:17:00 | 8 |
| 16 | 测试114 | 2019-04-02 19:17:00 | 8 |
| 17 | 测试mmm | 2019-04-02 19:17:00 | 8 |
| 18 | kkkkkkk | 2019-04-02 19:17:00 | 8 |
| 19 | kkkkkkk明明 | 2019-04-02 19:17:00 | 8 |
| 20 | ceshi111 | 2019-04-02 19:17:00 | 8 |
+----+-----------------+---------------------+-------------+
根据platform_id,publish_time分组并根据policy_name排序
SELECT
a.platform_id,
a.publish_time,
a.policy_name,
a.id,
IF (
@str1 = a.platform_id
AND @str2 = a.publish_time,
@rank := @rank + 1,
@rank := 1
) AS rank_no,
@str1 := a.platform_id,
@str2 := a.publish_time
FROM
(
SELECT
platform_id,
publish_time,
policy_name,
id
FROM
policy_summary
ORDER BY
platform_id,
publish_time,
policy_name ASC
) a,
(
SELECT
@str1 := 0,
@str2 := NULL,
@rank := 0
) tmp
# 查询结果
+-------------+---------------------+-----------------+----+---------+------------------------+-------------------------+
| platform_id | publish_time | policy_name | id | rank_no | @str1 := a.platform_id | @str2 := a.publish_time |
+-------------+---------------------+-----------------+----+---------+------------------------+-------------------------+
| 2 | 2019-04-01 15:11:00 | test0 | 1 | 1 | 2 | 2019-04-01 15:11:00 |
| 2 | 2019-04-01 15:11:00 | test1 | 5 | 2 | 2 | 2019-04-01 15:11:00 |
| 2 | 2019-04-01 15:11:00 | test2 | 6 | 3 | 2 | 2019-04-01 15:11:00 |
| 2 | 2019-04-01 16:11:00 | test3 | 7 | 1 | 2 | 2019-04-01 16:11:00 |
| 2 | 2019-04-01 16:11:00 | test4 | 8 | 2 | 2 | 2019-04-01 16:11:00 |
| 2 | 2019-04-01 16:11:00 | test5 | 9 | 3 | 2 | 2019-04-01 16:11:00 |
| 2 | 2019-04-01 17:11:00 | test6 | 10 | 1 | 2 | 2019-04-01 17:11:00 |
| 2 | 2019-04-01 18:11:00 | test7 | 4 | 1 | 2 | 2019-04-01 18:11:00 |
| 2 | 2019-04-01 19:11:00 | test8 | 3 | 1 | 2 | 2019-04-01 19:11:00 |
| 2 | 2019-04-01 19:11:00 | test9 | 2 | 2 | 2 | 2019-04-01 19:11:00 |
| 8 | 2019-04-02 19:17:00 | 1218测试 | 13 | 1 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | ccc | 15 | 2 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | ceshi111 | 20 | 3 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | fengyu | 11 | 4 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | kkkkkkk | 18 | 5 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | kkkkkkk明明 | 19 | 6 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | xxx | 14 | 7 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | 新建政策1111111 | 12 | 8 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | 测试114 | 16 | 9 | 8 | 2019-04-02 19:17:00 |
| 8 | 2019-04-02 19:17:00 | 测试mmm | 17 | 10 | 8 | 2019-04-02 19:17:00 |
+-------------+---------------------+-----------------+----+---------+------------------------+-------------------------+
这样rank_no就是那个标志排序的字段,如果要求最大policy_name的记录就只需要加一个过滤条件rank_no <= 1即可
SELECT
t.platform_id,
t.publish_time,
t.policy_name,
t.rank_no,
t.id
FROM
(
SELECT
a.id,
a.platform_id,
a.publish_time,
a.policy_name,
IF (
@str1 = a.platform_id
AND @str2 = a.publish_time,
@rank := @rank + 1,
@rank := 1
) AS rank_no,
@str1 := a.platform_id,
@str2 := a.publish_time
FROM
(
SELECT
id,
platform_id,
publish_time,
policy_name
FROM
policy_summary
ORDER BY
platform_id,
publish_time,
policy_name ASC
) a,
(
SELECT
@str1 := 0,
@str2 := NULL,
@rank := 0
) tmp
) t
WHERE
t.rank_no <= 1