• MySQL->处理重复数据[20180517]


    限制数据重复的方式:表上增加主键(Primary Key)或增加唯一性索引(Unique)
     
     
    主键对重复资料进行限制,这样资料在导入时就无法重复插入
     
    create table primary_t(
    last_name varchar(20),
    first_name varchar(20),
    sex varchar(8),
    primary key(last_name,first_name)
    )engine=innodb default charset=utf8;
     
     
     
    show table status like 'primary_t'G
     
    show index from primary_tG
     
     
    模拟插入两笔有主键约束的资料
    insert into primary_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Li','Man'),('Coco','Li','Woman');
     
    ERROR 1062 (23000): Duplicate entry 'Frank-Li' for key 'PRIMARY'
     
     
    insert into primary_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Cai','Man'),('Coco','Li','Woman');
     
    mysql> select * from primary_t;
    +-----------+------------+-------+
    | last_name | first_name | sex   |
    +-----------+------------+-------+
    | Coco      | Li         | Woman |
    | Frank     | Cai        | Man   |
    | Frank     | Li         | Man   |
    +-----------+------------+-------+
    3 rows in set (0.00 sec)
     
     
    唯一性索引对复制资料进行限制
     
    create table unique_t(
    last_name varchar(20),
    first_name varchar(20),
    sex varchar(8),
    unique index  unique_name(last_name,first_name)
    )engine=innodb default charset=utf8;
     
    show table status like 'unique_t'G
     
    show index from unique_tG
     
     
    模拟插入两笔有唯一性约束的资料
    insert into unique_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Li','Man'),('Coco','Li','Woman');
     
    ERROR 1062 (23000): Duplicate entry 'Frank-Li' for key 'unique_name'
     
     
    insert into unique_t(last_name,first_name,sex) values('Frank','Li','Man'),('Frank','Cai','Man'),('Coco','Li','Woman');
    mysql> select * from unique_t;
    +-----------+------------+-------+
    | last_name | first_name | sex   |
    +-----------+------------+-------+
    | Frank     | Li         | Man   |
    | Frank     | Cai        | Man   |
    | Coco      | Li         | Woman |
    +-----------+------------+-------+
    3 rows in set (0.00 sec)
     
     
     
    对资料的统计
     
    create table count_t(
    last_name varchar(20),
    first_name varchar(20),
    sex varchar(8)
    )engine=innodb default charset=utf8;
     
    insert into count_t values('Frank','A','Man'),
    ('Frank','B','Man'),
    ('Frank','C','Woman'),
    ('Frank','D','Man'),
    ('Frank','E','Man'),
    ('Frank','F','Woman'),
    ('Frank','G','Man'),
    ('Frank','H','Man'),
    ('Frank','I','Woman'),
    ('Coco','A','Woman'),
    ('Coco','B','Man'),
    ('Coco','C','Man'),
    ('Coco','D','Man'),
    ('Coco','E','Man'),
    ('Coco','F','Man'),
    ('Coco','G','Woman'),
    ('Coco','H','Woman'),
    ('Alex','B','Man'),
    ('ALex','C','Man'),
    ('ALex','D','Man'),
    ('Alex','E','Man'),
    ('ALex','F','Man'),
    ('ALex','G','Woman'),
    ('ALex','H','Woman')
    ;
     
    commit;
     
    按各个列进行分组统计
    select last_name,count(*) from count_t group by last_name;
     
     
    select first_name,count(*) from count_t group by first_name;
     
     
    select sex,count(*) from count_t group by sex;
     
     
    mysql> select last_name,count(*) from count_t group by last_name;
    +-----------+----------+
    | last_name | count(*) |
    +-----------+----------+
    | Alex      |        7 |
    | Coco      |        8 |
    | Frank     |        9 |
    +-----------+----------+
    3 rows in set (0.00 sec)
     
     
    mysql> select first_name,count(*) from count_t group by first_name;
    +------------+----------+
    | first_name | count(*) |
    +------------+----------+
    | A          |        2 |
    | B          |        3 |
    | C          |        3 |
    | D          |        3 |
    | E          |        3 |
    | F          |        3 |
    | G          |        3 |
    | H          |        3 |
    | I          |        1 |
    +------------+----------+
    9 rows in set (0.00 sec)
     
     
    mysql> select sex,count(*) from count_t group by sex;
    +-------+----------+
    | sex   | count(*) |
    +-------+----------+
    | Man   |       16 |
    | Woman |        8 |
    +-------+----------+
    2 rows in set (0.00 sec)
     
     
    对分组的统计在进行限定,having 大于等于8次的记录
    select last_name ,count(*) from count_t group by last_name having count(*) >=8;
     
    mysql> select last_name ,count(*) from count_t group by last_name having count(*) >=8;
    +-----------+----------+
    | last_name | count(*) |
    +-----------+----------+
    | Coco      |        8 |
    | Frank     |        9 |
    +-----------+----------+
    2 rows in set (0.00 sec)
     
     
     
     
    对资料进行去重操作,主要使用distinct函数
     
    select distinct last_name from count_t;
     
     
    select distinct first_name from count_t;
     
     
    select distinct sex from count_t;
     
     
    对已有资料的表进行去重操作
     
    insert into count_t values('Xi','Xi','Man'),('Xi','Xi','Man');
     
     
    增加主键约束,加入ignore,忽悠重复的资料
     
    alter ignore table count_t
    add primary key(last_name,first_name);
     
    select * from count_t where last_name='Xi';
     
     
     
    mysql> alter ignore table count_t
        -> add primary key(last_name,first_name);
    Query OK, 26 rows affected (0.01 sec)
    Records: 26  Duplicates: 1  Warnings: 0
     
    mysql> select * from count_t where last_name='Xi';
    +-----------+------------+------+
    | last_name | first_name | sex  |
    +-----------+------------+------+
    | Xi        | Xi         | Man  |
    +-----------+------------+------+
    1 row in set (0.00 sec)
     
    使用group by方式也可去除重复资料
    
    
    create table count_t0 as select last_name,first_name,sex from count_t group by last_name,first_name,sex;
     
     
  • 相关阅读:
    定制博客园CSS
    后记:Cookie安全大辩论总结
    硬造的轮子趟过的坑--浮点型转字符串函数
    支付宝Cookie高危漏洞引发的思考
    博客园人氣提升密籍
    写个PHP框架吧
    开发一个程序员专用的搜索引擎
    三种常见网站工程师招聘条件总结
    golang之vscode环境配置
    golang环境安装
  • 原文地址:https://www.cnblogs.com/also-brook/p/9050609.html
Copyright © 2020-2023  润新知