PostgreSQL数组类型应用

在使用 awk 脚本；数组是一大利器；在很多场景是用数组能处理。

在 python 中，数据类型list；相当于array类型。

在 Oracle 中，对 array 不够友好，感觉像是鸡肋。但是在 PostgreSQL 中，对array有很多支持，很多场景可以应用到。下面慢慢说

1、any(array) 替换 in（table）

-- 案例1
-- 创建表A；插入1000条记录；并每条记录重复4次
postgres=# create table A (id int, info text);
CREATE TABLE
postgres=# 
postgres=# insert into A select generate_series(1,1000), 'lottu';
INSERT 0 1000
postgres=# 
postgres=# insert into A select generate_series(1,1000), 'lottu';
INSERT 0 1000
postgres=# insert into A select * from A;
INSERT 0 2000
-- 用in的方式去处理重复数据
postgres=# begin;
BEGIN
postgres=# explain (analyze, costs, timing) delete from A where ctid not in (select min(ctid) from A group by id, info);
                                                        QUERY PLAN                                                         
---------------------------------------------------------------------------------------------------------------------------
 Delete on a  (cost=74.38..131.31 rows=1397 width=6) (actual time=12.619..12.619 rows=0 loops=1)
   ->  Seq Scan on a  (cost=74.38..131.31 rows=1397 width=6) (actual time=5.146..7.129 rows=3000 loops=1)
         Filter: (NOT (hashed SubPlan 1))
         Rows Removed by Filter: 1000
         SubPlan 1
           ->  HashAggregate  (cost=70.89..73.69 rows=279 width=42) (actual time=3.762..4.155 rows=1000 loops=1)
                 Group Key: a_1.id, a_1.info
                 ->  Seq Scan on a a_1  (cost=0.00..49.94 rows=2794 width=42) (actual time=0.017..1.158 rows=4000 loops=1)
 Planning Time: 1.923 ms
 Execution Time: 44.130 ms
(10 rows)
-- 用any(array)的方式处理
postgres=# explain (analyze, costs, timing) delete from A
postgres-#  where ctid = any(array (select ctid
postgres(#                      from (select "row_number"() over(partition by id, info) as rn,
postgres(#                                   ctid
postgres(#                              from A) as ad
postgres(#                     where ad.rn > 1));
                                                           QUERY PLAN                                                            
---------------------------------------------------------------------------------------------------------------------------------
 Delete on a  (cost=300.69..340.79 rows=10 width=6) (actual time=17.686..17.686 rows=0 loops=1)
   InitPlan 1 (returns $0)
     ->  Subquery Scan on ad  (cost=209.87..300.68 rows=931 width=6) (actual time=3.995..9.503 rows=3000 loops=1)
           Filter: (ad.rn > 1)
           Rows Removed by Filter: 1000
           ->  WindowAgg  (cost=209.87..265.75 rows=2794 width=50) (actual time=3.986..8.570 rows=4000 loops=1)
                 ->  Sort  (cost=209.87..216.86 rows=2794 width=42) (actual time=3.974..4.577 rows=4000 loops=1)
                       Sort Key: a_1.id, a_1.info
                       Sort Method: quicksort  Memory: 284kB
                       ->  Seq Scan on a a_1  (cost=0.00..49.94 rows=2794 width=42) (actual time=0.015..1.486 rows=4000 loops=1)
   ->  Tid Scan on a  (cost=0.01..40.11 rows=10 width=6) (actual time=11.130..12.945 rows=3000 loops=1)
         TID Cond: (ctid = ANY ($0))
 Planning Time: 0.619 ms
 Execution Time: 17.808 ms
(14 rows)
结论：
1、效率大大提升；数据量越大提升效果越好；any（array） 的效果 >= in
2、判断 array 所含元素的方法,有 any / some (any) 还有 all两种方法

2、array 相关函数

-- string 转换 array
-- 函数 string_to_array
select array_to_string(array[1, 2, 3], '~^~');
 array_to_string 
-----------------
 1~^~2~^~3
-- 函数 string_to_array
select string_to_array('1~^~2~^~3','~^~');
 string_to_array 
-----------------
 {1,2,3}
-- 函数 regexp_split_to_array；跟string_to_array有点类似
select regexp_split_to_array('1~^~2~^~3','~^~');
 regexp_split_to_array 
-----------------------
 {1,2,3}
-- 函数 unnest
select  unnest(array['a', 'b', 'c']);
 unnest 
--------
 a
 b
 c
-- 还可以结合with ordinality；添加行号
select * from unnest(array['a', 'b', 'c']) with ordinality;
 unnest | ordinality 
--------+------------
 a      |          1
 b      |          2
 c      |          3

相关阅读:
ASP.net2.0中的特殊文件App_global.asax.compiled
Enterprise Library 2.0中log文件大小设置
 尝试读取或写入受保护的内存,这通常指示其它内存已损坏
 没有钱的生活
 如何把java项目打包成war包
 常用的正则表达式
 最简单的oracle10g手工建库步骤
 rman复制数据库ORA01547ORA01194ORA01110，强制打开并修改日志文件
 oracle 10g 的max函数的bug
PL/SQL Developer 使用错误的tnsnames.ora,如何修改？
原文地址：https://www.cnblogs.com/lottu/p/13730272.html