• postgresql插入大量模拟数据总结


    1.准备数据:

    创建两张表count_group和person_face_flow

    -- DROP TABLE public.count_group;
    
    CREATE TABLE public.count_group
    (
        id character varying COLLATE pg_catalog."default" NOT NULL,
        name character varying(64) COLLATE pg_catalog."default" NOT NULL,
        type integer NOT NULL,
        is_delete integer NOT NULL DEFAULT 0,
        area integer,
        create_time timestamp with time zone,
        update_time timestamp with time zone,
        region_id character varying(48) COLLATE pg_catalog."default" NOT NULL,
        CONSTRAINT count_group_pkey PRIMARY KEY (id)
    )
    
    -- DROP TABLE public.person_face_flow;
    
    CREATE TABLE public.person_face_flow
    (
        id character varying(36) COLLATE pg_catalog."default" NOT NULL,
        group_id character varying(36) COLLATE pg_catalog."default" NOT NULL,
        stat_time timestamp without time zone,
        create_time timestamp without time zone NOT NULL,
        update_time timestamp without time zone NOT NULL,
        in_number integer,
        out_number integer,
        gender character varying(20) COLLATE pg_catalog."default",
        age_type character varying(20) COLLATE pg_catalog."default",
        CONSTRAINT person_face_flow_pkey PRIMARY KEY (id)
    )
    

    2.生成模拟数据的脚本:

    1. count_group表插入400条模拟数据
    create extension IF NOT EXISTS "uuid-ossp";
    DO $$
    
    DECLARE formatStr text := 'INSERT INTO count_group (id, name, type, is_delete, 
    area,region_id,create_time,update_time)values
    ((SELECT uuid_generate_v4()), ''统计组_''||''%s'',''%s'',0,''%s'',uuid_generate_v4(),now(),now())';
      DECLARE exe_sql text;
    
      DECLARE random_base INTEGER := 100;
      DECLARE i int :=1;
      DECLARE gender_array text[] := '{0,1,2,3}';
      DECLARE gender text;
    
    BEGIN
      WHILE i<101 LOOP
    
        FOREACH gender IN Array gender_array LOOP
            exe_sql = format(formatStr,i,gender,random_base);
            execute exe_sql;
          END LOOP;
    
        i:=i+1;
      END LOOP;
    END
    $$;
    
    1. person_face_flow表插入模拟数据
    
    create extension IF NOT EXISTS "uuid-ossp"; 
    DO $$
    
    DECLARE formatStr text := 'INSERT INTO person_face_flow (id, group_id, stat_time, create_time,
    update_time,in_number,out_number,gender,age_type) 
    SELECT uuid_generate_v4(),''%s'',generate_series(''%s''::TIMESTAMPtz,''%s''::TIMESTAMPtz,''1 day'')
    ,now(),now(),''%s'',''%s'',''%s'',''%s'';';
    DECLARE exe_sql text;
    
    DECLARE start_time TIMESTAMPTZ := '2019-01-01' ;
    DECLARE end_time TIMESTAMPTZ := '2019-02-01';
    DECLARE temp_end TIMESTAMPTZ;
    DECLARE is_random BOOLEAN := true;
    DECLARE random_base INTEGER := 100;
    DECLARE month_time TIMESTAMPTZ;
    DECLARE month_time_end TIMESTAMPTZ;
    DECLARE record_item RECORD;
    
    DECLARE gender_array text[] := '{male,female,unknow}';
    DECLARE gender text;
    DECLARE age_array text[] := '{child,youth,old}';
    DECLARE age text;
    
    BEGIN
    	FOR record_item IN (SELECT id FROM count_group where is_delete=0) LOOP
            FOREACH gender IN Array gender_array LOOP
                FOREACH age IN Array age_array LOOP
                    month_time = start_time;
                    month_time_end = end_time;
                    while month_time < month_time_end LOOP
                        temp_end = month_time + interval '1 week' - interval '1 day';
                        if(temp_end > end_time) then 
                            temp_end = end_time;
                        end if;
                        exe_sql = format(formatStr,record_item.id,month_time,temp_end, 
                        (random()*100)::int , (random()*100)::int,gender,age);
                        execute exe_sql;
                        month_time = month_time + interval '1 week';
                    END LOOP;
                END LOOP;
            END LOOP;
    	END LOOP;
    END 
    $$;
    

    3.总结说明

    1. 生成序列

    	SELECT generate_series(1,10)
    

    generate_series函数

    函数 参数类型 返回类型 描述
    generate_series(start, stop) int 或 bigint setof int 或 setof bigint(与参数类型相同) 生成一个数值序列,从start 到 stop,步进为一
    generate_series(start, stop, step) int 或 bigint setof int 或 setof bigint(与参数类型相同) 生成一个数值序列,从start 到 stop,步进为step
    generate_series(start, stop, step_interval) timestamp or timestamp with time zone timestamp 或 timestamp with time 生成一个数值序列,从start 到 stop,步进为step

    2. 生成字符串

    生成36位的UUID字符串

    	SELECT uuid_generate_v4()
    

    生成32位随机字符串

    	select md5(random()::text)
    

    生成重复字符串

    	-- abc重复多次的字符串
    	SELECT repeat('abc',(random()*4)::integer);
    	-- 重复2次的MD5字符串
    	select repeat(md5(random()::text),2);
    

    连接符

    	select concat('a', ' ', 'b');
    

    3. 生成随机数

    生成100以内的随机数字

    	select (random()*100)::int
    

    生成100以内,总位数为4,小数点后2位的小数

    	SELECT (random()*100.)::numeric(4,2);
    

    4. 生成日期

    SELECT date(generate_series(now(), now() + '1 week', '1 day'));
    
    select generate_series(to_date('20130403','yyyymmdd'), t
    	o_date('20130404','yyyymmdd'), '3 hours');      
    ------------------------
     2013-04-03 00:00:00+08
     2013-04-03 03:00:00+08
     2013-04-03 06:00:00+08
     2013-04-03 09:00:00+08
     2013-04-03 12:00:00+08
     2013-04-03 15:00:00+08
     2013-04-03 18:00:00+08
     2013-04-03 21:00:00+08
     2013-04-04 00:00:00+08
    (9 rows)
    

    5. 自定义函数

    生成随机汉字

    	create or replace function gen_hanzi(int) returns text as $$    
    	declare    
    	  res text;    
    	begin    
    	  if $1 >=1 then    
    	    select string_agg(chr(19968+(random()*20901)::int),'') into res from generate_series(1,$1);    
    	    return res;    
    	  end if;    
    	  return null;    
    	end;    
    	$$ language plpgsql strict;   
    	--输出汉字
    	select gen_hanzi(10) from generate_series(1,10); 
    
    

    生成随机身份证号

    create or replace function gen_id(a date, b date)     
    returns text as $$    
    select lpad((random()*99)::int::text, 2, '0') ||     
           lpad((random()*99)::int::text, 2, '0') ||     
           lpad((random()*99)::int::text, 2, '0') ||     
           to_char(a + (random()*(b-a))::int, 'yyyymmdd') ||     
           lpad((random()*99)::int::text, 2, '0') ||     
           random()::int ||     
           (case when random()*10 >9 then 'X' else (random()*9)::int::text end ) ;    
    $$ language sql strict;    
    
    --输出身份证号
     select gen_id('1900-01-01', '2017-10-16') from generate_series(1,10);     
    --------------------  
     25614020061108330X  
     49507919010403271X  
     96764619970119860X  
     915005193407306113  
     551360192005045415  
     430005192611170108  
     299138191310237806  
     95149919670723980X  
     542053198501097403  
     482334198309182411  
    (10 rows)  
    

    生成随机数组

    create or replace function gen_rand_arr(int,int) returns int[] as $$    
      select array_agg((random()*$1)::int) from generate_series(1,$2);    
    $$ language sql strict;    
    --输出数组
    select gen_rand_arr(100,10) from generate_series(1,10);             
    ---------------------------------  
     {69,11,12,70,7,41,81,95,83,17}  
     {26,79,20,21,64,64,51,90,38,38}  
     {3,64,46,28,26,55,39,12,69,76}  
     {66,38,87,78,8,94,18,88,89,1}  
     {6,14,81,26,36,45,90,87,35,28}  
     {25,38,91,71,67,17,26,5,29,95}  
     {82,94,32,69,72,40,63,90,29,51}  
     {91,34,66,72,60,1,17,50,88,51}  
     {77,13,89,69,84,56,86,10,61,14}  
     {5,43,8,38,11,80,78,74,70,6}  
    (10 rows)  
    

    例:

    
    select generate_series(1,5), (random()*100)::int,
    (random()*20.)::numeric(4,2), gen_hanzi(3), uuid_generate_v4()
    

    结果:
    在这里插入图片描述

  • 相关阅读:
    anltr 解析MYSQL
    MYSQL 主从复制
    Java happens-before
    傅里叶分析-数据通信的理论基础
    Java jdk常用工具集合
    kafka报错 日志压缩报错直接退出
    linux centos7开启防火墙端口
    mysql_取分组后的前几行值
    数据库隔离级别
    mysql删除重复数据
  • 原文地址:https://www.cnblogs.com/seasail/p/12179348.html
Copyright © 2020-2023  润新知