appendonly
-- drop table if exists test_appendonly;
-- create table test_appendonly with(appendonly=true, compresslevel=5) as
-- select generate_series(0, 1000) a, 'helloworld'::varchar(50) b
-- distributed by(a);
select oid from pg_class where relname='test_appendonly';
select oid, oid::regclass from pg_class where relname='test_appendonly' or relname like '%212266%';
-- d pg_aoseg.pg_aoseg_212266
select * from get_ao_compression_ratio('test_appendonly');
select sum(eofuncompressed)/sum(eof) as compression_ratio from gp_dist_random('pg_aoseg.pg_aoseg_212266');
select * from get_ao_distribution('test_appendonly') order by segmentid;
select gp_segment_id, tupcount from gp_dist_random('pg_aoseg.pg_aoseg_212266') order by gp_segment_id;
drop table if exists public.table_info cascade;
create table public.table_info (
tablename text, -- 表名
subparname text, -- 分区各
tablecount bigint, -- 表的行数
tablesize bigint, -- 表大小
prettysize text, -- 格式化大小输出
max_div_avg float, -- 斜率,最大节点数据量/平均节点数据量
compression_ratio text -- 压缩率
);
-- 获取表信息
create or replace function public.get_table_info(tablename text) returns setof table_info as $$
def one_table_info(plpy, tablename, subparname, aosegname, privilege):
aosegsql = ""
# plpy.info(privilege)
if privilege == '1':
aosegsql = '''
select '%s' tablename, '%s' subparname,
coalesce(sum(tupcount)::bigint, 0) tablecount,
coalesce(sum(eof)::bigint, 0) tablesize,
pg_size_pretty(coalesce(sum(tupcount)::bigint, 0)) prettysize,
coalesce(max(tupcount)::bigint, 1)/(case when coalesce(avg(tupcount), 1.0) = 0 then 1
else coalesce(avg(tupcount), 1.0) end) max_div_avg,
coalesce(sum(eofuncompressed), 1)/(case when coalesce(sum(eof), 1.0) = 0 then 1
else coalesce(sum(eof), 1.0) end) compression_ratio
from gp_dist_random('%s');
'''%(tablename, subparname, aosegname)
else:
aosegsql = '''
select '%s' tablename, '%s' subparname,
0 tablecount, 0 tablesize, 'permission denied' prettysize,
0 max_div_avg, 0 compression_ratio;
'''%(tablename, subparname)
plpy.info(aosegsql)
result_rv=plpy.execute(aosegsql)
# plpy.info(result_rv[0]);
return result_rv[0]
try:
table_name = tablename.lower().split('.')[1]
table_schema = tablename.lower().split('.')[0]
except(IndexError):
plpy.error('Please in put "tableschema.table_name"')
# check version of database
check_version_sql = """
select substring(version(), 'Database (.*) build') as version;
"""
rv = plpy.execute(check_version_sql)
version = rv[0]['version']
plpy.execute("set enable_seqscan=off")
# get table oid
get_table_oid = ''
if version > '3.4.0':
get_table_oid = """
select a.oid, reloptions, b.segrelid, regclass2text(b.segrelid::regclass) aosegname, relstorage,
case has_table_privilege(user, b.segrelid, 'select') when 't' then '1' else '0' end privilege
from pg_class a left join pg_appendonly b on a.oid=b.relid where a.oid='%s'::regclass;
"""%(tablename)
else:
get_table_oid = """
select oid, reloptions, relaosegrelid, regclass2text(relaosegrelid::regclass) aosegname, relstorage,
case has_table_privilege(user, relaosegrelid, 'select') when 't' then '1' else '0' end privilege
from pg_class where oid='%s'::regclass;
"""%(tablename)
try:
rv_oid = plpy.execute(get_table_oid, 5)
if not rv_oid:
plpy.error('Did not find any relation named "' + tablename + '".')
except (Error):
plpy.error('Did not find any relation named "' + tablename + '".')
#
table_oid = rv_oid[0]['oid']
if rv_oid[0]['relstorage'] != 'a':
plpy.error(tablename + ' is not appendonly table, this function only support appendonly talbe')
# plpy.info('table_oid')
# check if table is partitin table
check_par_table="select count(*) from pg_partition where parrelid=%s"%(table_oid)
if version > '3.4.0':
tablecount_sql = """
select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pa.segrelid,
regclass2text(pa.segrelid::regclass) aosegname,
case has_table_privilege(user, pa.segrelid, 'select') when 't' then '1' else '0' end privilege
from pg_partition pp, pg_partition_rule prl, pg_appendonly pa
where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pa.relid=prl.parchildrelid
order by prl.parruleord;
"""%(table_oid)
else:
tablecount_sql = """
select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pc.relaosegrelid,
regclass2text(pc.relaosegrelid::regclass) aosegname,
case has_table_privilege(user, pc.relaosegrelid, 'select') when 't' then '1' else '0' end privilege
from pg_partition pp, pg_partition_rule prl, pg_class pc
where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pc.oid=prl.parchildrelid
and relaosegrelid <> 0 order by prl.parruleord;
"""%(table_oid)
rv = plpy.execute(check_par_table)
if rv[0]['count'] == 1:
al = plpy.execute(tablecount_sql)
result_rv = []
rv_tmp = []
totalcount = 0
totalsize = 0
unzipsize = 0
compression_ratio = 1
for i in al:
rv_ao = one_table_info(plpy, tablename, i['parname'], i['aosegname'], str(i['privilege']))
rv_tmp.append(rv_ao)
totalsize = totalsize + rv_ao['tablesize']
totalcount = totalcount + rv_ao['tablecount']
unzipsize = unzipsize + rv_ao['tablesize'] * rv_ao['compression_ratio']
if totalsize == 0:
compression_total = 1
else:
compression_ratio = unzipsize/totalsize
total_count_sql = """
select '%s' as tablename, '###ALL###' as subparname, %d as tablecount, %d as tablesize,
pg_size_pretty(%d::bigint) prettysize, null as max_div_avg, %f as compression_ratio;
"""%(tablename, totalcount, totalsize, totalsize, compression_ratio)
a2 = plpy.execute(total_count_sql)
result_rv.append(a2[0])
plpy.info('===' + total_count_sql)
for i in rv_tmp:
result_rv.append(i)
return result_rv;
else:
result_rv = []
rv_ao = one_table_info(plpy, tablename, '', rv_oid[0]['aosegname'], str(rv_oid[0]['privilege']));
result_rv.append(rv_ao)
return result_rv
$$ language plpythonu;
select * from get_table_info('public.test_appendonly');
select get_table_info('public.test_appendonly');
hostname
-- create language plpythonu ;
create or replace function public.hostname() returns text as $$
import socket;
return socket.gethostname();
$$ language plpythonu;
create or replace function public.reverse(str text) returns text as $$
if str != None:
return str[::-1]
else:
return None
$$ language plpythonu;
create or replace function public.json_parse(data text) returns text as $$
import json
try:
mydata = json.loads(data)
except:
return ['Parse json error']
returndata = []
try:
for people in mydata['people']:
returndata.append(people['firstName'] + ' ' + people['lastName'])
except:
return ['Parse json error']
return returndata
$$ language plpythonu;
select hostname();
select hostname() from gp_dist_random('gp_id');
select gp_segment_id, count(1) from gp_dist_random('pg_class') group by 1 order by 1;
select * from gp_configuration;
select * from gp_segment_configuration;
select * from pg_filespace_entry;
select * from gp_configuration_history;
-- 判断某个表是否是分区表
select count(1) from pg_partition where parrelid='public.tb_partition_list_yyyymmdd'::regclass;
select * from pg_partition_rule;
drop view public.v_pg_partitions;
create view public.v_pg_partitions as
select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,
case
when pp.parkind='h'::"char" then 'hash'::text
when pp.parkind='r'::"char" then 'range'::text
when pp.parkind='l'::"char" then 'list'::text
else NULL::text
end as partitiontype,
case
when pg_get_expr(prl.parrangeend, prl.parchildrelid) = ''
then pg_get_expr(prl.parlistvalues, prl.parchildrelid)
else pg_get_expr(prl.parrangeend, prl.parchildrelid)
end as HIGH_VALUE,
pg_get_partition_rule_def(prl.oid, true) as partitionboundary,
prl.parruleord as partitionposition
from pg_partition pp, pg_partition_rule prl
where pp.paristemplate = false and prl.paroid=pp.oid;
select * from public.v_pg_partitions where tableoid='tb_partition_list_yyyymmdd'::regclass order by partitionposition;
partition
drop table if exists public.tb_partition_range_yyyymmdd cascade;
create table public.tb_partition_range_yyyymmdd (
id numeric,
yyyymmdd date
) with(appendonly=true, compresslevel=5)
distributed by(id)
partition by range(yyyymmdd)
(
partition p20120811 start ('2012-08-11'::date) end ('2012-08-12'::date)
-- partition p20120812 start ('2012-08-12'::date) end ('2012-08-13'::date)
);
drop table if exists public.tb_partition_list_yyyymmdd cascade;
create table public.tb_partition_list_yyyymmdd (
id numeric,
yyyymmdd varchar(128)
) with(appendonly=true, compresslevel=5)
distributed by(id)
partition by list(yyyymmdd)
(
partition p20120811 values('20120811'),
partition p20120812 values('20120812')
);
drop view if exists public.v_pg_add_partitions cascade;
create view public.v_pg_add_partitions as
select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,
case
when pp.parkind='h'::"char" then 'hash'::text
when pp.parkind='r'::"char" then 'range'::text
when pp.parkind='l'::"char" then 'list'::text
else NULL::text
end as partitiontype,
translate(pg_get_expr(prl.parlistvalues, prl.parchildrelid), '-''::date
character varying bpchar numeric double precision timestamp without time zone', '') as partitionlistvalue,
substring(translate(pg_get_expr(prl.parrangestart, prl.parchildrelid), '-''::date
character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangestart,
substring(translate(pg_get_expr(prl.parrangeend, prl.parchildrelid), '-''::date
character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangeend,
prl.parruleord as partitionposition,
substring(parlistvalues, 'consttype ([0-9]+)')::integer::regtype listtype,
substring(parrangeend, 'consttype ([0-9]+)')::integer::regtype rangetype
from pg_partition pp, pg_partition_rule prl where pp.paristemplate=false and prl.paroid=pp.oid;
create or replace function public.add_partition_info(tableoid oid, days_from_now integer) returns setof text as $$
import datetime
def now():
d = datetime.datetime.now()
format = '%Y%m%d'
return datetime.datetime.strftime(d, format)
def add_day(d, n):
format = '%Y%m%d'
d2 = datetime.datetime.strptime(d, format)
d3 = d2 + datetime.timedelta(days = n)
return datetime.datetime.strftime(d3, format)
def add_month(d, n):
format = '%Y%m%d'
formatymd = '%Y%m01'
if d.__len__() == 6:
format = '%Y%m'
formatymd = '%Y%m'
d2 = datetime.datetime.strptime(d, format)
d3 = d2 + datetime.timedelta(days = 31 * n)
return datetime.datetime.strftime(d3, formatymd)
relist = []
# pre_value 是上一个分区的值,主要是 list 分区时使用
sql = """select *, tableoid::regclass tablename, lead(case when partitionrangeend <> '' then partitionrangeend
else partitionlistvalue end) over(partition by tableoid order by partitionposition desc) as pre_value,
row_number() over(partition by tableoid order by partitionposition desc) rn
from v_pg_add_partitions where substr(partitionname, 1, 3) = 'p20' and tableoid=%s;"""%(tableoid)
rv = plpy.execute(sql);
sql_relation = "select array_to_string(reloptions, ',') reloptions from pg_class where oid=%s"%(tableoid)
rv_relation = plpy.execute(sql_relation)
if rv.nrows() == -1:
return []
else:
reloptions = rv_relation[0]['reloptions']
tablename = rv[0]['tablename']
partitiontype = rv[0]['partitiontype']
partitionname = rv[0]['partitionname']
pre_value = rv[0]['pre_value']
now_add_7days = add_day(now(), days_from_now)
# 处理 range 分区
if partitiontype == 'range':
rangetype = rv[0]['rangetype']
partitionrangestart = rv[0]['partitionrangestart']
partitionrangeend = rv[0]['partitionrangeend']
interval = int(partitionrangeend) - int(partitionrangestart)
# 按月分区
if partitionname.__len__() == 7:
func_add = add_month
interval = int(partitionrangeend[0:6]) - int(partitionrangestart[0:6])
# 按天分区
elif partitionname.__len__() == 9:
func_add = add_day
# 分区名不规范,不处理
else:
return []
partitionrangestart = now()
while partitionrangestart < now_add_7days:
partitionrangeend = func_add(partitionrangestart, )
partitionname = 'p' + partitionrangestart
add_sql = "alter table %s add partition %s start ('%s'::%s) end ('%s'::%s)"%(tablename, partitionname, partitionrangestart, rangetype, partitionrangeend, rangetype)
if reloptions != None and reloptions != '':
add_sql += 'with(%s);'%(reloptions)
else:
add_sql += ';'
plpy.execute(add_sql);
relist.append(add_sql)
partitionrangestart = func_add(partitionrangestart, interval)
# 处理 list 分区
if partitiontype == 'list':
listtype = rv[0]['listtype']
partitionlistvalue = rv[0]['partitionlistvalue']
interval = int(partitionlistvalue) - int(pre_value)
# 按月分区
if partitionname.__len__() == 7:
func_add = add_month
# 按天分区
elif partitionname.__len__() == 9:
func_add = add_day
# 分区名不规范,不处理
else:
return []
partitionlistvalue = now()
while partitionlistvalue < now_add_7days:
partitionname = 'p' + partitionlistvalue
add_sql = "alter table %s add partition %s values('%s'::%s)"%(tablename, partitionname, partitionlistvalue, listtype)
if reloptions != None and reloptions != '':
add_sql += 'with(%s);'%(reloptions)
else:
add_sql += ';'
plpy.execute(add_sql);
relist.append(add_sql)
partitionlistvalue = func_add(partitionlistvalue, interval)
return relist
$$ language plpythonu;
select add_partition_info('tb_partition_list_yyyymmdd'::regclass, 1);
select 'grant select on ' || nspname || '.' || relname || ' to gpadmin;' from pg_class a, pg_namespace b
where relname not like '%_1_prt%' and relkind='r' and has_table_privilege('gpadmin', a.oid, 'select')='f'
and a.relnamespace=b.oid and nspname not in ('pg_catalog', 'information_schema') and nspname not like '%pg_tmp%';
view
-- 1.获取表的字段信息,表名pg_class,schema在pg_namespace,字段信息pg_attribute
select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a,
(
select c.oid from pg_catalog.pg_class c left join pg_catalog.pg_namespace n on n.oid=c.relnamespace
where c.relname='pg_class' and n.nspname='pg_catalog'
) b
where a.attrelid=b.oid and a.attnum > 0 and not a.attisdropped order by a.attnum;
-- 1.1 获取表的字段信息
select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a
where a.attrelid='pg_catalog.pg_class'::regclass and a.attnum > 0 and not a.attisdropped order by a.attnum;
-- 2. 获取表的分布键gp_distribution_policy中
-- 2.1 localoid与pg_class的oid关联,attrnums是一个数组,记录字段attnum,与pg_attribute中的attnum关联
drop table if exists public.cxfa2 cascade;
create table public.cxfa2(a int, b int, c int, d int) distributed by(c, a);
select * from gp_distribution_policy where localoid='cxfa2'::regclass;
select * from gp_distribution_policy a, (select generate_series(1, 10)) i (i), pg_attribute b
where a.attrnums[i.i] is not null and a.localoid=b.attrelid and a.attrnums[i.i]=b.attnum
and a.localoid='public.cxfa2'::regclass order by i.i;
-- 3. 获取建表的时间
select * from pg_stat_file('pg_hba.conf');
drop type if exists public.stat_file cascade;
create type public.stat_file as (
size bigint,
access timestamp(0),
modification timestamp(0),
change timestamp(0)
);
create or replace function public.get_file_stat(filename text) returns stat_file as $$
import os, time
size = None
access = None
modification = None
change = None
try:
a = os.stat(filename)
size = int(a.st_size)
access = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_atime))
modification = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_mtime))
change = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_ctime))
except Exception, e:
pass
return [size, access, modification, change]
$$ language plpythonu;
select * from get_file_stat('pg_hba.conf');
drop view if exists public.v_table_modify_time cascade;
create view public.v_table_modify_time as
select tab_oid, schemaname, tablename, (filestat).access, (filestat).modification, (filestat).change
from
(
select a.oid tab_oid, e.nspname as schemaname, a.relname as tablename,
get_file_stat(fselocation || '/' ||
case when reltablespace=1664 then 'global'
when reltablespace=0 then 'base' || '/' || d.oid
else reltablespace || '/' || d.oid
end
|| '/' || relfilenode) as filestat
from pg_class a, pg_tablespace b, pg_filespace_entry c, pg_namespace e, pg_database d
where d.datname=current_database()
and (case when a.reltablespace = 0 then 1663 else a.reltablespace end)=b.oid
and b.spcfsoid=c.fsefsoid
and e.oid=a.relnamespace
and c.fsedbid=1
and a.relstorage in ('a', 'h')
and a.relkind='r'
) t;
select 'drop table ' || schemaname || '.' || tablename || ' cascade;'
from v_table_modify_time where access < now() - '1 days'::interval and tablename not like '%_1_prt_p%'
and schemaname='public' order by access;
-- 自定义类型转换
select castfunc::regprocedure from pg_cast where castsource='text'::regtype and casttarget='date'::regtype;
-- select '20180526'::date;
-- select date('20180526');
-- select date('2018-05-26');
-- select cast('2018-05-26' as date);
create or replace function public.regclass2text(a regclass) returns text as $$
return a;
$$ language plpythonu;
drop cast if exists (regclass as text) cascade;
create cast(regclass as text) with function regclass2text(a regclass);
select 57377::regclass::text;
create view v_gp_configuration as
select content
from gp_segment_configuration a, pg_filespace_entry b, pg_filespace create
where a.dbid=b.fsedbid and b.fsefsoid=c.oid and c.fsname='pg_system';
https://www.cnblogs.com/someblue/p/4225694.html
https://blog.csdn.net/menggudaoke/article/details/78843749