• gp sql


    appendonly

    -- drop table if exists test_appendonly;
    -- create table test_appendonly with(appendonly=true, compresslevel=5) as
    -- select generate_series(0, 1000) a, 'helloworld'::varchar(50) b
    -- distributed by(a);

    select oid from pg_class where relname='test_appendonly';
    select oid, oid::regclass from pg_class where relname='test_appendonly' or relname like '%212266%';
    -- d pg_aoseg.pg_aoseg_212266
    select * from get_ao_compression_ratio('test_appendonly');
    select sum(eofuncompressed)/sum(eof) as compression_ratio from gp_dist_random('pg_aoseg.pg_aoseg_212266');

    select * from get_ao_distribution('test_appendonly') order by segmentid;
    select gp_segment_id, tupcount from gp_dist_random('pg_aoseg.pg_aoseg_212266') order by gp_segment_id;

    drop table if exists public.table_info cascade;
    create table public.table_info (
    tablename text, -- 表名
    subparname text, -- 分区各
    tablecount bigint, -- 表的行数
    tablesize bigint, -- 表大小
    prettysize text, -- 格式化大小输出
    max_div_avg float, -- 斜率,最大节点数据量/平均节点数据量
    compression_ratio text -- 压缩率
    );

    -- 获取表信息
    create or replace function public.get_table_info(tablename text) returns setof table_info as $$

    def one_table_info(plpy, tablename, subparname, aosegname, privilege):
        aosegsql = ""
        # plpy.info(privilege)
        if privilege == '1':
            aosegsql = '''
                select '%s' tablename, '%s' subparname, 
                    coalesce(sum(tupcount)::bigint, 0) tablecount,
                    coalesce(sum(eof)::bigint, 0) tablesize,
                    pg_size_pretty(coalesce(sum(tupcount)::bigint, 0)) prettysize,
                    coalesce(max(tupcount)::bigint, 1)/(case when coalesce(avg(tupcount), 1.0) = 0 then 1 
                        else coalesce(avg(tupcount), 1.0) end) max_div_avg,
                    coalesce(sum(eofuncompressed), 1)/(case when coalesce(sum(eof), 1.0) = 0 then 1 
                        else coalesce(sum(eof), 1.0) end) compression_ratio
                from gp_dist_random('%s');
            '''%(tablename, subparname, aosegname)
        else:
            aosegsql = '''
                select '%s' tablename, '%s' subparname, 
                    0 tablecount, 0 tablesize, 'permission denied' prettysize,
                    0 max_div_avg, 0 compression_ratio;
            '''%(tablename, subparname)
        plpy.info(aosegsql)
        result_rv=plpy.execute(aosegsql)
        # plpy.info(result_rv[0]);
        return result_rv[0]
    
    try:
        table_name = tablename.lower().split('.')[1]
        table_schema = tablename.lower().split('.')[0]
    except(IndexError):
        plpy.error('Please in put "tableschema.table_name"')
    
    # check version of database
    check_version_sql = """
        select substring(version(), 'Database (.*) build') as version;
    """
    rv = plpy.execute(check_version_sql)
    version = rv[0]['version']
    plpy.execute("set enable_seqscan=off")
    
    # get table oid
    get_table_oid = ''
    if version > '3.4.0':
        get_table_oid = """
            select a.oid, reloptions, b.segrelid, regclass2text(b.segrelid::regclass) aosegname, relstorage,
                case has_table_privilege(user, b.segrelid, 'select') when 't' then '1' else '0' end privilege
            from pg_class a left join pg_appendonly b on a.oid=b.relid where a.oid='%s'::regclass;
        """%(tablename)
    else:
        get_table_oid = """
            select oid, reloptions, relaosegrelid, regclass2text(relaosegrelid::regclass) aosegname, relstorage,
                case has_table_privilege(user, relaosegrelid, 'select') when 't' then '1' else '0' end privilege
            from pg_class where oid='%s'::regclass;
        """%(tablename)
    
    try:
        rv_oid = plpy.execute(get_table_oid, 5)
        if not rv_oid:
            plpy.error('Did not find any relation named "' + tablename + '".')
    except (Error):
        plpy.error('Did not find any relation named "' + tablename + '".')
    
    # 
    table_oid = rv_oid[0]['oid']
    if rv_oid[0]['relstorage'] != 'a':
        plpy.error(tablename + ' is not appendonly table, this function only support appendonly talbe')
    # plpy.info('table_oid')
    
    # check if table is partitin table
    check_par_table="select count(*) from pg_partition where parrelid=%s"%(table_oid)
    if version > '3.4.0':
        tablecount_sql = """
            select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pa.segrelid, 
                regclass2text(pa.segrelid::regclass) aosegname,
                case has_table_privilege(user, pa.segrelid, 'select') when 't' then '1' else '0' end privilege
            from pg_partition pp, pg_partition_rule prl, pg_appendonly pa 
            where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pa.relid=prl.parchildrelid 
            order by prl.parruleord;
        """%(table_oid)
    else:
        tablecount_sql = """
            select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pc.relaosegrelid, 
                regclass2text(pc.relaosegrelid::regclass) aosegname,
                case has_table_privilege(user, pc.relaosegrelid, 'select') when 't' then '1' else '0' end privilege
            from pg_partition pp, pg_partition_rule prl, pg_class pc 
            where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pc.oid=prl.parchildrelid 
            and relaosegrelid <> 0 order by prl.parruleord;
        """%(table_oid)
    
    rv = plpy.execute(check_par_table)
    
    if rv[0]['count'] == 1:
        al = plpy.execute(tablecount_sql)
        result_rv = []
        rv_tmp = []
    
        totalcount = 0
        totalsize = 0
        unzipsize = 0
        compression_ratio = 1
        for i in al:
            rv_ao = one_table_info(plpy, tablename, i['parname'], i['aosegname'], str(i['privilege']))
            rv_tmp.append(rv_ao)
            totalsize = totalsize + rv_ao['tablesize']
            totalcount = totalcount + rv_ao['tablecount']
            unzipsize = unzipsize + rv_ao['tablesize'] * rv_ao['compression_ratio']
        if totalsize == 0:
            compression_total = 1
        else:
            compression_ratio = unzipsize/totalsize
        total_count_sql = """
            select '%s' as tablename, '###ALL###' as subparname, %d as tablecount, %d as tablesize, 
                pg_size_pretty(%d::bigint) prettysize, null as max_div_avg, %f as compression_ratio;
        """%(tablename, totalcount, totalsize, totalsize, compression_ratio)
        a2 = plpy.execute(total_count_sql)
        result_rv.append(a2[0])
    
        plpy.info('===' + total_count_sql)
    
        for i in rv_tmp:
            result_rv.append(i)
        return result_rv;
    else:
        result_rv = []
        rv_ao = one_table_info(plpy, tablename, '', rv_oid[0]['aosegname'], str(rv_oid[0]['privilege']));
        result_rv.append(rv_ao)
        return result_rv
    

    $$ language plpythonu;

    select * from get_table_info('public.test_appendonly');
    select get_table_info('public.test_appendonly');

    hostname

    -- create language plpythonu ;
    create or replace function public.hostname() returns text as $$
    import socket;
    return socket.gethostname();
    $$ language plpythonu;

    create or replace function public.reverse(str text) returns text as $$
    if str != None:
    return str[::-1]
    else:
    return None
    $$ language plpythonu;

    create or replace function public.json_parse(data text) returns text as $$
    import json
    try:
    mydata = json.loads(data)
    except:
    return ['Parse json error']
    returndata = []
    try:
    for people in mydata['people']:
    returndata.append(people['firstName'] + ' ' + people['lastName'])
    except:
    return ['Parse json error']
    return returndata
    $$ language plpythonu;

    select hostname();
    select hostname() from gp_dist_random('gp_id');
    select gp_segment_id, count(1) from gp_dist_random('pg_class') group by 1 order by 1;

    select * from gp_configuration;
    select * from gp_segment_configuration;
    select * from pg_filespace_entry;
    select * from gp_configuration_history;

    -- 判断某个表是否是分区表
    select count(1) from pg_partition where parrelid='public.tb_partition_list_yyyymmdd'::regclass;
    select * from pg_partition_rule;

    drop view public.v_pg_partitions;
    create view public.v_pg_partitions as
    select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,
    case
    when pp.parkind='h'::"char" then 'hash'::text
    when pp.parkind='r'::"char" then 'range'::text
    when pp.parkind='l'::"char" then 'list'::text
    else NULL::text
    end as partitiontype,
    case
    when pg_get_expr(prl.parrangeend, prl.parchildrelid) = ''
    then pg_get_expr(prl.parlistvalues, prl.parchildrelid)
    else pg_get_expr(prl.parrangeend, prl.parchildrelid)
    end as HIGH_VALUE,
    pg_get_partition_rule_def(prl.oid, true) as partitionboundary,
    prl.parruleord as partitionposition
    from pg_partition pp, pg_partition_rule prl
    where pp.paristemplate = false and prl.paroid=pp.oid;

    select * from public.v_pg_partitions where tableoid='tb_partition_list_yyyymmdd'::regclass order by partitionposition;

    partition

    drop table if exists public.tb_partition_range_yyyymmdd cascade;
    create table public.tb_partition_range_yyyymmdd (
    id numeric,
    yyyymmdd date
    ) with(appendonly=true, compresslevel=5)
    distributed by(id)
    partition by range(yyyymmdd)
    (
    partition p20120811 start ('2012-08-11'::date) end ('2012-08-12'::date)
    -- partition p20120812 start ('2012-08-12'::date) end ('2012-08-13'::date)
    );

    drop table if exists public.tb_partition_list_yyyymmdd cascade;
    create table public.tb_partition_list_yyyymmdd (
    id numeric,
    yyyymmdd varchar(128)
    ) with(appendonly=true, compresslevel=5)
    distributed by(id)
    partition by list(yyyymmdd)
    (
    partition p20120811 values('20120811'),
    partition p20120812 values('20120812')
    );

    drop view if exists public.v_pg_add_partitions cascade;
    create view public.v_pg_add_partitions as
    select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,
    case
    when pp.parkind='h'::"char" then 'hash'::text
    when pp.parkind='r'::"char" then 'range'::text
    when pp.parkind='l'::"char" then 'list'::text
    else NULL::text
    end as partitiontype,
    translate(pg_get_expr(prl.parlistvalues, prl.parchildrelid), '-''::date
    character varying bpchar numeric double precision timestamp without time zone', '') as partitionlistvalue,
    substring(translate(pg_get_expr(prl.parrangestart, prl.parchildrelid), '-''::date
    character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangestart,
    substring(translate(pg_get_expr(prl.parrangeend, prl.parchildrelid), '-''::date
    character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangeend,
    prl.parruleord as partitionposition,
    substring(parlistvalues, 'consttype ([0-9]+)')::integer::regtype listtype,
    substring(parrangeend, 'consttype ([0-9]+)')::integer::regtype rangetype
    from pg_partition pp, pg_partition_rule prl where pp.paristemplate=false and prl.paroid=pp.oid;

    create or replace function public.add_partition_info(tableoid oid, days_from_now integer) returns setof text as $$

    import datetime
    
    def now():
        d = datetime.datetime.now()
        format = '%Y%m%d'
        return datetime.datetime.strftime(d, format)
    
    def add_day(d, n):
        format = '%Y%m%d'
        d2 = datetime.datetime.strptime(d, format)
        d3 = d2 + datetime.timedelta(days = n)
        return datetime.datetime.strftime(d3, format)
    
    def add_month(d, n):
        format = '%Y%m%d'
        formatymd = '%Y%m01'
        if d.__len__() == 6:
            format = '%Y%m'
            formatymd = '%Y%m'
        d2 = datetime.datetime.strptime(d, format)
        d3 = d2 + datetime.timedelta(days = 31 * n)
        return datetime.datetime.strftime(d3, formatymd)
    
    relist = []
    
    # pre_value 是上一个分区的值,主要是 list 分区时使用
    sql = """select *, tableoid::regclass tablename, lead(case when partitionrangeend <>  '' then partitionrangeend 
     else partitionlistvalue end) over(partition by tableoid order by partitionposition desc) as pre_value,
     row_number() over(partition by tableoid order by partitionposition desc) rn
     from v_pg_add_partitions where substr(partitionname, 1, 3) = 'p20' and tableoid=%s;"""%(tableoid)
    rv = plpy.execute(sql);
    sql_relation = "select array_to_string(reloptions, ',') reloptions from pg_class where oid=%s"%(tableoid)
    rv_relation = plpy.execute(sql_relation)
    
    if rv.nrows() == -1:
        return []
    else:
        reloptions = rv_relation[0]['reloptions']
        tablename = rv[0]['tablename']
        partitiontype = rv[0]['partitiontype']
        partitionname = rv[0]['partitionname']
        pre_value = rv[0]['pre_value']
        now_add_7days = add_day(now(), days_from_now)
    
        # 处理 range 分区
        if partitiontype == 'range':
            rangetype = rv[0]['rangetype']
            partitionrangestart = rv[0]['partitionrangestart']
            partitionrangeend = rv[0]['partitionrangeend']
            interval = int(partitionrangeend) - int(partitionrangestart)
    
            # 按月分区
            if partitionname.__len__() == 7:
                func_add = add_month
                interval = int(partitionrangeend[0:6]) - int(partitionrangestart[0:6])
    
            # 按天分区
            elif partitionname.__len__() == 9:
                func_add = add_day
    
            # 分区名不规范,不处理
            else:
                return []
            
            partitionrangestart = now()
            while partitionrangestart < now_add_7days:
                partitionrangeend = func_add(partitionrangestart,  )
                partitionname = 'p' + partitionrangestart
                add_sql = "alter table %s add partition %s start ('%s'::%s) end ('%s'::%s)"%(tablename, partitionname, partitionrangestart, rangetype, partitionrangeend, rangetype)
                    
                if reloptions != None and reloptions != '':
                    add_sql += 'with(%s);'%(reloptions)
                else:
                    add_sql += ';'
                plpy.execute(add_sql);
                relist.append(add_sql)
                partitionrangestart = func_add(partitionrangestart, interval)
    
        # 处理 list 分区
        if partitiontype == 'list':
            listtype = rv[0]['listtype']
            partitionlistvalue = rv[0]['partitionlistvalue']
            interval = int(partitionlistvalue) - int(pre_value)
    
            # 按月分区
            if partitionname.__len__() == 7:
                func_add = add_month
    
            # 按天分区
            elif partitionname.__len__() == 9:
                func_add = add_day
    
            # 分区名不规范,不处理
            else:
                return []
            
            partitionlistvalue = now()
            while partitionlistvalue < now_add_7days:
                partitionname = 'p' + partitionlistvalue
                add_sql = "alter table %s add partition %s values('%s'::%s)"%(tablename, partitionname, partitionlistvalue, listtype)
                    
                if reloptions != None and reloptions != '':
                    add_sql += 'with(%s);'%(reloptions)
                else:
                    add_sql += ';'
                plpy.execute(add_sql);
                relist.append(add_sql)
                partitionlistvalue = func_add(partitionlistvalue, interval)
    return relist
    

    $$ language plpythonu;

    select add_partition_info('tb_partition_list_yyyymmdd'::regclass, 1);

    select 'grant select on ' || nspname || '.' || relname || ' to gpadmin;' from pg_class a, pg_namespace b
    where relname not like '%_1_prt%' and relkind='r' and has_table_privilege('gpadmin', a.oid, 'select')='f'
    and a.relnamespace=b.oid and nspname not in ('pg_catalog', 'information_schema') and nspname not like '%pg_tmp%';

    view

    -- 1.获取表的字段信息,表名pg_class,schema在pg_namespace,字段信息pg_attribute
    select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a,
    (
    select c.oid from pg_catalog.pg_class c left join pg_catalog.pg_namespace n on n.oid=c.relnamespace
    where c.relname='pg_class' and n.nspname='pg_catalog'
    ) b
    where a.attrelid=b.oid and a.attnum > 0 and not a.attisdropped order by a.attnum;

    -- 1.1 获取表的字段信息
    select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a
    where a.attrelid='pg_catalog.pg_class'::regclass and a.attnum > 0 and not a.attisdropped order by a.attnum;

    -- 2. 获取表的分布键gp_distribution_policy中
    -- 2.1 localoid与pg_class的oid关联,attrnums是一个数组,记录字段attnum,与pg_attribute中的attnum关联
    drop table if exists public.cxfa2 cascade;
    create table public.cxfa2(a int, b int, c int, d int) distributed by(c, a);

    select * from gp_distribution_policy where localoid='cxfa2'::regclass;

    select * from gp_distribution_policy a, (select generate_series(1, 10)) i (i), pg_attribute b
    where a.attrnums[i.i] is not null and a.localoid=b.attrelid and a.attrnums[i.i]=b.attnum
    and a.localoid='public.cxfa2'::regclass order by i.i;

    -- 3. 获取建表的时间
    select * from pg_stat_file('pg_hba.conf');

    drop type if exists public.stat_file cascade;
    create type public.stat_file as (
    size bigint,
    access timestamp(0),
    modification timestamp(0),
    change timestamp(0)
    );
    create or replace function public.get_file_stat(filename text) returns stat_file as $$
    import os, time
    size = None
    access = None
    modification = None
    change = None

    try:
        a = os.stat(filename)
        size = int(a.st_size)
        access = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_atime))
        modification = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_mtime))
        change = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_ctime))
    except Exception, e:
        pass
    return [size, access, modification, change]
    

    $$ language plpythonu;

    select * from get_file_stat('pg_hba.conf');

    drop view if exists public.v_table_modify_time cascade;
    create view public.v_table_modify_time as
    select tab_oid, schemaname, tablename, (filestat).access, (filestat).modification, (filestat).change
    from
    (
    select a.oid tab_oid, e.nspname as schemaname, a.relname as tablename,
    get_file_stat(fselocation || '/' ||
    case when reltablespace=1664 then 'global'
    when reltablespace=0 then 'base' || '/' || d.oid
    else reltablespace || '/' || d.oid
    end
    || '/' || relfilenode) as filestat
    from pg_class a, pg_tablespace b, pg_filespace_entry c, pg_namespace e, pg_database d
    where d.datname=current_database()
    and (case when a.reltablespace = 0 then 1663 else a.reltablespace end)=b.oid
    and b.spcfsoid=c.fsefsoid
    and e.oid=a.relnamespace
    and c.fsedbid=1
    and a.relstorage in ('a', 'h')
    and a.relkind='r'
    ) t;

    select 'drop table ' || schemaname || '.' || tablename || ' cascade;'
    from v_table_modify_time where access < now() - '1 days'::interval and tablename not like '%_1_prt_p%'
    and schemaname='public' order by access;

    -- 自定义类型转换
    select castfunc::regprocedure from pg_cast where castsource='text'::regtype and casttarget='date'::regtype;

    -- select '20180526'::date;
    -- select date('20180526');
    -- select date('2018-05-26');
    -- select cast('2018-05-26' as date);

    create or replace function public.regclass2text(a regclass) returns text as $$
    return a;
    $$ language plpythonu;
    drop cast if exists (regclass as text) cascade;
    create cast(regclass as text) with function regclass2text(a regclass);

    select 57377::regclass::text;

    create view v_gp_configuration as
    select content
    from gp_segment_configuration a, pg_filespace_entry b, pg_filespace create
    where a.dbid=b.fsedbid and b.fsefsoid=c.oid and c.fsname='pg_system';

    https://www.cnblogs.com/someblue/p/4225694.html
    https://blog.csdn.net/menggudaoke/article/details/78843749

  • 相关阅读:
    算术运算
    数据分析
    科学计算
    面向对象
    文件操作-py
    pillow图像处理
    模块
    固定翼飞行过程产生的阻力
    修改行间距等基本操作
    文件操作
  • 原文地址:https://www.cnblogs.com/binarylei/p/9137824.html
Copyright © 2020-2023  润新知