• PostgreSQL 系统参数调整及并行设置(转)


    转自:https://yq.aliyun.com/teams/5

     

    OS 准备

    # yum -y install coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp readline-devel zlib zlib-devel openssl openssl-devel pam-devel libxml2-devel libxslt-devel python-devel tcl-devel gcc make smartmontools flex bison perl perl-devel perl-ExtUtils* openldap openldap-devel
    
    # vi /etc/sysctl.conf
    # add by digoal.zhou
    fs.aio-max-nr = 1048576
    fs.file-max = 76724600
    kernel.core_pattern= /data01/corefiles/core_%e_%u_%t_%s.%p         
    # /data01/corefiles事先建好,权限777
    kernel.sem = 4096 2147483647 2147483646 512000    
    # 信号量, ipcs -l 或 -u 查看,每16个进程一组,每组信号量需要17个信号量。
    kernel.shmall = 107374182      
    # 所有共享内存段相加大小限制(建议内存的80%)
    kernel.shmmax = 274877906944   
    # 最大单个共享内存段大小(建议为内存一半), >9.2的版本已大幅降低共享内存的使用
    kernel.shmmni = 819200         
    # 一共能生成多少共享内存段,每个PG数据库集群至少2个共享内存段
    net.core.netdev_max_backlog = 10000
    net.core.rmem_default = 262144       
    # The default setting of the socket receive buffer in bytes.
    net.core.rmem_max = 4194304          
    # The maximum receive socket buffer size in bytes
    net.core.wmem_default = 262144       
    # The default setting (in bytes) of the socket send buffer.
    net.core.wmem_max = 4194304          
    # The maximum send socket buffer size in bytes.
    net.core.somaxconn = 4096
    net.ipv4.tcp_max_syn_backlog = 4096
    net.ipv4.tcp_keepalive_intvl = 20
    net.ipv4.tcp_keepalive_probes = 3
    net.ipv4.tcp_keepalive_time = 60
    net.ipv4.tcp_mem = 8388608 12582912 16777216
    net.ipv4.tcp_fin_timeout = 5
    net.ipv4.tcp_synack_retries = 2
    net.ipv4.tcp_syncookies = 1    
    # 开启SYN Cookies。当出现SYN等待队列溢出时,启用cookie来处理,可防范少量的SYN攻击
    net.ipv4.tcp_timestamps = 1    
    # 减少time_wait
    net.ipv4.tcp_tw_recycle = 0    
    # 如果=1则开启TCP连接中TIME-WAIT套接字的快速回收,但是NAT环境可能导致连接失败,建议服务端关闭它
    net.ipv4.tcp_tw_reuse = 1      
    # 开启重用。允许将TIME-WAIT套接字重新用于新的TCP连接
    net.ipv4.tcp_max_tw_buckets = 262144
    net.ipv4.tcp_rmem = 8192 87380 16777216
    net.ipv4.tcp_wmem = 8192 65536 16777216
    net.nf_conntrack_max = 1200000
    net.netfilter.nf_conntrack_max = 1200000
    vm.dirty_background_bytes = 409600000       
    #  系统脏页到达这个值,系统后台刷脏页调度进程 pdflush(或其他) 自动将(dirty_expire_centisecs/100)秒前的脏页刷到磁盘
    vm.dirty_expire_centisecs = 3000             
    #  比这个值老的脏页,将被刷到磁盘。3000表示30秒。
    vm.dirty_ratio = 95                          
    #  如果系统进程刷脏页太慢,使得系统脏页超过内存 95 % 时,则用户进程如果有写磁盘的操作(如fsync, fdatasync等调用),则需要主动把系统脏页刷出。
    #  有效防止用户进程刷脏页,在单机多实例,并且使用CGROUP限制单实例IOPS的情况下非常有效。  
    vm.dirty_writeback_centisecs = 100            
    #  pdflush(或其他)后台刷脏页进程的唤醒间隔, 100表示1秒。
    vm.extra_free_kbytes = 4096000
    vm.min_free_kbytes = 2097152
    vm.mmap_min_addr = 65536
    vm.overcommit_memory = 0     
    #  在分配内存时,允许少量over malloc, 如果设置为 1, 则认为总是有足够的内存,内存较少的测试环境可以使用 1 .  
    vm.overcommit_ratio = 90     
    #  当overcommit_memory = 2 时,用于参与计算允许指派的内存大小。
    vm.swappiness = 0            
    #  关闭交换分区
    vm.zone_reclaim_mode = 0     
    # 禁用 numa, 或者在vmlinux中禁止. 
    net.ipv4.ip_local_port_range = 40000 65535    
    # 本地自动分配的TCP, UDP端口号范围
    #  vm.nr_hugepages = 102352    
    #  建议shared buffer设置超过64GB时 使用大页,页大小 /proc/meminfo Hugepagesize
    
    # sysctl -p
    
    # vi /etc/security/limits.conf
    * soft    nofile  1024000
    * hard    nofile  1024000
    * soft    nproc   unlimited
    * hard    nproc   unlimited
    * soft    core    unlimited
    * hard    core    unlimited
    * soft    memlock unlimited
    * hard    memlock unlimited
    
    # rm -f /etc/security/limits.d/*
    

    安装

    $ wget https://ftp.postgresql.org/pub/source/v9.6.0/postgresql-9.6.0.tar.bz2
    
    $ tar -jxvf postgresql-9.6.0.tar.bz2
    
    $ cd postgresql-9.6.0
    
    $ ./configure --prefix=/home/digoal/pgsql9.6.0
    
    $ make world -j 32
    $ make install-world -j 32
    
    
    $ vi ~/.bash_profile
    export PS1="$USER@`/bin/hostname -s`-> "
    export PGPORT=5281
    export PGDATA=/u02/digoal/pg_root$PGPORT
    export LANG=en_US.utf8
    export PGHOME=/home/digoal/pgsql9.6.0
    export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH
    export DATE=`date +"%Y%m%d%H%M"`
    export PATH=$PGHOME/bin:$PATH:.
    export MANPATH=$PGHOME/share/man:$MANPATH
    export PGHOST=$PGDATA
    export PGUSER=postgres
    export PGDATABASE=postgres
    alias rm='rm -i'
    alias ll='ls -lh'
    unalias vi
    
    
    $ . ~/.bash_profile 
    
    
    $ df -h
    /dev/mapper/vgdata01-lv03
                          4.0T  1.3T  2.8T  32% /u01
    /dev/mapper/vgdata01-lv04
                          7.7T  899G  6.8T  12% /u02
    

    初始化集群

    $ initdb -D $PGDATA -E UTF8 --locale=C -U postgres -X /u01/digoal/pg_xlog$PGPORT
    

    配置数据库参数

    $ cd $PGDATA
    
    
    $ vi postgresql.conf
    listen_addresses = '0.0.0.0'
    port = 5281
    max_connections = 800
    superuser_reserved_connections = 13
    unix_socket_directories = '.'
    unix_socket_permissions = 0700
    tcp_keepalives_idle = 60
    tcp_keepalives_interval = 10
    tcp_keepalives_count = 10
    shared_buffers = 128GB
    huge_pages = try
    maintenance_work_mem = 2GB
    dynamic_shared_memory_type = sysv
    vacuum_cost_delay = 0
    bgwriter_delay = 10ms
    bgwriter_lru_maxpages = 1000
    bgwriter_lru_multiplier = 10.0
    bgwriter_flush_after = 256
    max_worker_processes = 128
    max_parallel_workers_per_gather = 16
    old_snapshot_threshold = 8h
    backend_flush_after = 256
    synchronous_commit = off
    full_page_writes = off
    wal_buffers = 128MB
    wal_writer_delay = 10ms
    wal_writer_flush_after = 4MB
    checkpoint_timeout = 55min
    max_wal_size = 256GB
    checkpoint_flush_after = 1MB
    random_page_cost = 1.0
    effective_cache_size = 512GB
    constraint_exclusion = on  
    log_destination = 'csvlog'
    logging_collector = on
    log_checkpoints = on
    log_connections = on
    log_disconnections = on
    log_error_verbosity = verbose  
    log_timezone = 'PRC'
    autovacuum = on
    log_autovacuum_min_duration = 0
    autovacuum_max_workers = 8
    autovacuum_naptime = 10s
    autovacuum_vacuum_scale_factor = 0.02
    autovacuum_analyze_scale_factor = 0.01
    statement_timeout = 0
    lock_timeout = 0
    idle_in_transaction_session_timeout = 0
    gin_fuzzy_search_limit = 0
    gin_pending_list_limit = 4MB
    datestyle = 'iso, mdy'
    timezone = 'PRC'
    lc_messages = 'C'
    lc_monetary = 'C'
    lc_numeric = 'C'
    lc_time = 'C'
    default_text_search_config = 'pg_catalog.english'
    deadlock_timeout = 1s
    
    
    $ vi pg_hba.conf
    local   all             all                                     trust
    host    all             all             127.0.0.1/32            trust
    host    all             all             ::1/128                 trust
    host all all 0.0.0.0/0 md5
    

    启动数据库

    $ pg_ctl start
    

    二、多核并行计算相关参数与用法

    1. 控制整个数据库集群同时能开启多少个work process,必须设置。

    max_worker_processes = 128              # (change requires restart)  
    

    2. 控制一个并行的EXEC NODE最多能开启多少个并行处理单元,同时还需要参考表级参数parallel_workers,或者PG内核内置的算法,根据表的大小计算需要开启多少和并行处理单元。  
    实际取小的。

    max_parallel_workers_per_gather = 16    # taken from max_worker_processes
    

    3. 计算并行处理的成本,如果成本高于非并行,则不会开启并行处理。

    #parallel_tuple_cost = 0.1              # same scale as above
    #parallel_setup_cost = 1000.0   # same scale as above
    

    4. 小于这个值的表,不会开启并行。

    #min_parallel_relation_size = 8MB
    

    5. 告诉优化器,强制开启并行。

    #force_parallel_mode = off
    

    6. 表级参数,不通过表的大小计算并行度,而是直接告诉优化器这个表需要开启多少个并行计算单元。

    parallel_workers (integer)
    
    This sets the number of workers that should be used to assist a parallel scan of this table. 
    If not set, the system will determine a value based on the relation size. 
    The actual number of workers chosen by the planner may be less, for example due to the setting of max_worker_processes.
  • 相关阅读:
    【转】总体样本方差的无偏估计样本方差为什么除以n-1
    【转】用PyQt5写的第一个程序
    向量的协方差计算
    数据挖掘模型
    数据挖掘方法论及实施步骤
    Hadoop MapReduce八大步骤以及Yarn工作原理详解
    传统数据仓库架构与Hadoop的区别
    数据库优化方案整理
    数仓interview总结
    十四个值得推荐的个人提升方法
  • 原文地址:https://www.cnblogs.com/kuang17/p/8311071.html
Copyright © 2020-2023  润新知