表结构:
create table hy_product( id number(9,0) primary key, name nvarchar2(20) not null, price integer not null);
数据:
insert into hy_product(id,name,price) values('1','AA','10'); insert into hy_product(id,name,price) values('2','AA款','10'); insert into hy_product(id,name,price) values('3','AA屏风','10'); insert into hy_product(id,name,price) values('4','PDST','20'); insert into hy_product(id,name,price) values('5','PDST款','20'); insert into hy_product(id,name,price) values('6','PDST-TJ','20'); insert into hy_product(id,name,price) values('7','ASDF','20'); insert into hy_product(id,name,price) values('8','ASDF款','20'); insert into hy_product(id,name,price) values('9','ASDF-TJ','20'); insert into hy_product(id,name,price) values('10','PDF','20'); insert into hy_product(id,name,price) values('11','PDF款','20'); insert into hy_product(id,name,price) values('12','PDF-TJ','20');
需求:上表的name字段里,有AA款,AA,AA屏风等产品,实际上它们都是同一产品的不同表述。现在需要把前缀相同的归到最一类,举例来说AA款,AA,AA屏风等应该被归纳到AA类里。
期待值:按照需求,归类完的产品应该是AA,3;PDST,3;ASDF,3;PDF,3;四种。
昨天的文章 https://www.cnblogs.com/xiandedanteng/p/12709690.html 是边讲解边运行,SQL显得有点多,今天只把必要的列出。
创建辅助序列表:
create table tb_seq( id number(9,0) primary key); insert into tb_seq select rownum from dual connect by level<16 order by dbms_random.random;
整理数据放到临时表里:
create table tb_tmp1 as select d.sery,d.cnt from (select c.sery,count(*) as cnt from (select b.sery from (select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from (select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b where b.sery<>'extra') c group by c.sery) d where length(d.sery)>1 and d.cnt>1 order by d.cnt desc,d.sery
最后从临时表里查询的SQL:
select * from tb_tmp1 where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'), '[^@]+', ''))=1
查询结果:
可见结果与预期是完全相符的,昨晚今晨披星戴月写的SQL程序通过了考验。
以上使用的全部SQL:
create table hy_product( id number(9,0) primary key, name nvarchar2(20) not null, price integer not null); insert into hy_product(id,name,price) values('1','AA','10'); insert into hy_product(id,name,price) values('2','AA款','10'); insert into hy_product(id,name,price) values('3','AA屏风','10'); insert into hy_product(id,name,price) values('4','PDST','20'); insert into hy_product(id,name,price) values('5','PDST款','20'); insert into hy_product(id,name,price) values('6','PDST-TJ','20'); insert into hy_product(id,name,price) values('7','ASDF','20'); insert into hy_product(id,name,price) values('8','ASDF款','20'); insert into hy_product(id,name,price) values('9','ASDF-TJ','20'); insert into hy_product(id,name,price) values('10','PDF','20'); insert into hy_product(id,name,price) values('11','PDF款','20'); insert into hy_product(id,name,price) values('12','PDF-TJ','20'); commit; create table tb_seq( id number(9,0) primary key); insert into tb_seq select rownum from dual connect by level<16 order by dbms_random.random; create table tb_tmp1 as select d.sery,d.cnt from (select c.sery,count(*) as cnt from (select b.sery from (select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from (select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b where b.sery<>'extra') c group by c.sery) d where length(d.sery)>1 and d.cnt>1 order by d.cnt desc,d.sery select * from tb_tmp1 where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'), '[^@]+', ''))=1 drop table hy_product drop table tb_seq drop table tb_tmp1
--2020-04-16--