• MPP install


    greenplum install

    注意事项:
    1.hostname 不能大写##
    2.如果segment和mirror副本数比较大,建议调整/etc/ssh/sshd_config的maxconnect数值
    3.系统参数需要修改kernel.sem = 500 1024000 200 8192 ##############这个配置是三个节点 6块磁盘,每个磁盘6个segment 6个mirror 如果segment超过这个比例的话,需要对应调整sem的参数
    4.要使用gpadmin这个用户安装软件 vim /etc/sudoers 赋予gpadmin root的权限
    # User privilege specification
    root ALL=(ALL:ALL) ALL
    gpadmin ALL=(ALL:ALL) ALL

    安装步骤
    一.master standby work节点系统参数调整

    1.1用户准备
    sudo groupadd -g 530 gpadmin
    sudo useradd -g 530 -u 530 -m -d /home/gpadmin -s /bin/bash gpadmin
    sudo chown -R gpadmin:gpadmin /home/gpadmin/
    sudo passwd gpadmin

    sudo vim /etc/hosts
    # hosts for greenplum
    192.168.0.200 mpp01
    192.168.2.201 mpp02
    192.168.0.202 mpp03

    su gpadmin
    #免密登陆
    ssh-keygen -t rsa
    cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys
    chmod 600 ~/.ssh/authorized_keys
    chmod 700 ~/.ssh
    ssh-copy-id mpp02
    ssh-copy-id mpp03

    #####修改ubuntu18.04的主机名
    没有这个文件创建这个文件
    /etc/cloud/cloud.cfg
    preserve_hostname: true

    1.2 系统参数优化
    #参考文档:https://blog.csdn.net/ctypyb2002/article/details/84107389
    sudo vim /etc/sysctl.conf
    xfs_mount_options = rw,noatime,inode64,allocsize=16m ###如果不修改这个gpcheck会提示异常
    kernel.shmmax = 500000000
    kernel.shmmni = 4096
    kernel.shmall = 4000000000
    #kernel.sem = 500 1024000 200 8192 (108个segment及其以上,使用这个sem参数)
    kernel.sem = 500 1024000 200 4096 (108个segment以下,使用这个sem参数)
    kernel.sysrq = 1
    kernel.core_uses_pid = 1
    kernel.msgmnb = 1265536
    kernel.msgmax = 1265536
    kernel.msgmni = 2048
    net.ipv4.tcp_syncookies = 1
    net.ipv4.ip_forward = 0
    net.ipv4.conf.default.accept_source_route = 0
    net.ipv4.tcp_tw_recycle = 1
    net.ipv4.tcp_max_syn_backlog = 4096
    net.ipv4.conf.all.arp_filter = 1
    net.ipv4.ip_local_port_range = 1025 65535
    net.core.netdev_max_backlog = 10000
    net.core.rmem_max = 2097152
    net.core.wmem_max = 2097152
    vm.overcommit_memory = 2


    sudo sysctl -p

    1.3 #修改打开文件句柄的数量
    vim /etc/security/limits.conf
    * soft nofile 65536
    * hard nofile 65536
    * soft nproc 131072
    * hard nproc 131072

    1.4 #设置磁盘访问I/O调度策略(stat磁盘 选择deadline )
    Linux磁盘I/O调度器对磁盘的访问支持不同的策略,默认的为CFQ,GP建议设置为deadline 要查看某驱动器的I/O调度策略,可通过如下命令查看,下面示例的为正确的配置:
    # cat /sys/block/{devname}/queue/scheduler
    noop anticipatory [deadline] cfq
    针对我们服务器上的磁盘 修改i/o调度策略
    echo deadline > /sys/block/sda/queue/scheduler
    echo deadline > /sys/block/sdb/queue/scheduler
    echo deadline > /sys/block/sdc/queue/scheduler
    echo deadline > /sys/block/sdd/queue/scheduler
    echo deadline > /sys/block/sde/queue/scheduler
    echo deadline > /sys/block/sdf/queue/scheduler

    1.5 #打开磁盘预热
    每个磁盘设备文件需要配置read-ahead(blockdev)值为65536
    官方文档的推荐值为16384,但译者认为应该为65536更合理,该值设置的是预读扇区数,实际上预读的字节数是blockdev设置除以2,而GP缺省的blocksize为32KB,刚好与65536(32768B/32KB)对应。
    65536/16384 ? 待定 我默认使用官网的配置
    /sbin/blockdev --getra /dev/sda
    /sbin/blockdev --setra 16384 /dev/sda

    /sbin/blockdev --getra /dev/sdb
    /sbin/blockdev --setra 16384 /dev/sdb

    /sbin/blockdev --getra /dev/sdc
    /sbin/blockdev --setra 16384 /dev/sdc

    /sbin/blockdev --getra /dev/sdd
    /sbin/blockdev --setra 16384 /dev/sdd

    /sbin/blockdev --getra /dev/sde
    /sbin/blockdev --setra 16384 /dev/sde

    /sbin/blockdev --getra /dev/sdf
    /sbin/blockdev --setra 16384 /dev/sdf

    1.6 #修改sshd的配置
    vim /etc/ssh/sshd_config
    UseDNS no
    #PidFile /var/run/sshd.pid
    MaxStartups 100:30:1000

    #添加ubuntu 18.04 开机自启的服务
    sudo vi /etc/systemd/system/rc-local.service
    开机任务(https://www.jianshu.com/p/79d24b4af4e5)
    [Unit]
    Description=/etc/rc.local Compatibility
    ConditionPathExists=/etc/rc.local

    [Service]
    Type=forking
    ExecStart=/etc/rc.local start
    TimeoutSec=0
    StandardOutput=tty
    RemainAfterExit=yes
    SysVStartPriority=99

    [Install]
    WantedBy=multi-user.target


    sudo vi /etc/rc.local

    #!/bin/sh -e
    #
    # rc.local
    #
    # This script is executed at the end of each multiuser runlevel.
    # Make sure that the script will "exit 0" on success or any other
    # value on error.
    #
    # In order to enable or disable this script just change the execution
    # bits.
    #
    # By default this script does nothing.
    netplan apply
    exit 0

    sudo chmod +x /etc/rc.local
    sudo systemctl enable rc-local


    sudo systemctl start rc-local.service
    sudo systemctl status rc-local.service

    二、安装greenplum
    ###################master和standby work节点(mpp集群每个节点都要执行)

    2.1 安装gp-xercesc
    git clone https://github.com/greenplum-db/gp-xerces.git
    cd gp-xerces
    mkdir build
    cd build
    ../configure --prefix=/usr/local && make -j64
    sudo make -j64 install

    #安装re2c
    tar -zxvf re2c-1.1.1.tar.gz
    cd re2c-1
    ./configure
    make -j4
    sudo make -j4 install

    #安装ninja
    tar -zxvf ninja-1.9.0.tar.gz
    cd ninja
    ./configure.py --bootstrap
    sudo cp -rp ninja /usr/bin

    2.2 安装gporca
    git clone https://github.com/greenplum-db/gporca.git
    cd gporca-3.39.0/
    sudo apt install cmake
    cmake -GNinja -H. -Bbuild
    sudo ninja install -C build


    2.3 master节点安装gpdb(mpp的master节点执行)

    git clone https://github.com/greenplum-db/gpdb.git ####选择合适的gpdb版本https://github.com/greenplum-db/gpdb/releases/ #我选择的版本是https://github.com/greenplum-db/gpdb/releases/tag/6.0.0-beta.3
    cd gpdb
    ######安装需要的依赖关系
    sudo ./README.ubuntu.bash
    #将这个脚本拷贝到mpp其他的节点 安装基础包
    scp ./README.ubuntu.bash gpadmin@mpp02:/home/gpadmin
    scp ./README.ubuntu.bash gpadmin@mpp03:/home/gpadmin
    此时系统会默认安装libxerces-c-dev库,需要将此库移除以使其正确编译。

    sudo apt remove libxerces-c-dev ###此库会有冲突
    ./configure --enable-orca --with-perl --with-python --with-libxml --prefix=/usr/local/gpdb
    make -j64 && sudo make -j64 install
    #################
    sudo ldconfig #################一定要重载库文件,否则会报没有已安装的两个插件的库文件(如libgpos.so)

    2.4 standby和work节点安装greenplum
    cd /home/gpadmin
    ######安装需要的依赖关系
    sudo ./README.ubuntu.bash
    压缩master节点greenplum的安装目录,然后把压缩文件拷贝到standby和wodk的节点相同的目录,然后解压
    tar -czvf gpdb.tar.gz /usr/local/gpdb
    scp -rp gpdb.tar.gz gpadmin@mpp02:/home/gpadmin
    scp -rp gpdb.tar.gz gpadmin@mpp03:/home/gpadmin
    ssh gadmin@mpp02 "cd /home/gpadmin && tar -zxvf gpdb.tar.gz && mv gpdb /usr/local && sudo ldconfig "
    ssh gadmin@mpp03 "cd /home/gpadmin && tar -zxvf gpdb.tar.gz && mv gpdb /usr/local && sudo ldconfig "

    2.5 修改所有节点gpadmin的环境变量
    #master 节点 和standby节点
    sudo mkdir -p /data1/master
    sudo chown -R gpadmin:gpadmin /data1/master

    vim /home/gpadmin/.profile

    source /usr/local/gpdb/greenplum_path.sh
    export PGPORT=5432
    export PGDATABASE=gp_sydb
    TZ='Asia/Shanghai'; export TZ
    export MASTER_DATA_DIRECTORY=/data1/master/gpseg-1/ #master节点的数据目录
    #然后每个节点执行
    sudo ldconfig && source /home/gpadmin/.profile && source /usr/local/gpdb/greenplum_path.sh

    不重启刷新磁盘
    ls /sys/class/scsi_host/
    "---" 将后面的值改为空,重新刷新磁盘
    sudo echo "- - -" > /sys/class/scsi_host/host0/scan
    sudo echo "- - -" > /sys/class/scsi_host/host1/scan
    sudo echo "- - -" > /sys/class/scsi_host/host2/scan
    sudo echo "- - -" > /sys/class/scsi_host/host3/scan
    sudo echo "- - -" > /sys/class/scsi_host/host4/scan
    sudo echo "- - -" > /sys/class/scsi_host/host5/scan
    sudo echo "- - -" > /sys/class/scsi_host/host6/scan
    sudo echo "- - -" > /sys/class/scsi_host/host7/scan
    sudo echo "- - -" > /sys/class/scsi_host/host8/scan
    sudo echo "- - -" > /sys/class/scsi_host/host9/scan
    sudo echo "- - -" > /sys/class/scsi_host/host10/scan
    sudo echo "- - -" > /sys/class/scsi_host/host11/scan
    sudo echo "- - -" > /sys/class/scsi_host/host12/scan
    sudo echo "- - -" > /sys/class/scsi_host/host13/scan
    sudo echo "- - -" > /sys/class/scsi_host/host14/scan
    sudo echo "- - -" > /sys/class/scsi_host/host15/scan
    sudo echo "- - -" > /sys/class/scsi_host/host16/scan
    sudo echo "- - -" > /sys/class/scsi_host/host17/scan
    sudo echo "- - -" > /sys/class/scsi_host/host18/scan
    sudo echo "- - -" > /sys/class/scsi_host/host19/scan
    sudo echo "- - -" > /sys/class/scsi_host/host20/scan
    sudo echo "- - -" > /sys/class/scsi_host/host21/scan
    sudo echo "- - -" > /sys/class/scsi_host/host22/scan
    sudo echo "- - -" > /sys/class/scsi_host/host23/scan
    sudo echo "- - -" > /sys/class/scsi_host/host24/scan
    sudo echo "- - -" > /sys/class/scsi_host/host25/scan
    sudo echo "- - -" > /sys/class/scsi_host/host26/scan
    sudo echo "- - -" > /sys/class/scsi_host/host27/scan
    sudo echo "- - -" > /sys/class/scsi_host/host28/scan
    sudo echo "- - -" > /sys/class/scsi_host/host29/scan
    sudo echo "- - -" > /sys/class/scsi_host/host31/scan

    #########################################

    2.6 初始化准备(segment和mirror)
    segment 存放数据
    mirror是segment的镜像
    #################每个节点执行挂载
    mkfs.xfs /dev/sda
    mkfs.xfs /dev/sdb
    mkfs.xfs /dev/sdc
    mkfs.xfs /dev/sdd
    mkfs.xfs /dev/sde
    mkfs.xfs /dev/sdf
    #自动挂载
    vim /etc/fstab
    /dev/sda /data1 xfs defaults 0 0
    /dev/sdb /data2 xfs defaults 0 0
    /dev/sdc /data3 xfs defaults 0 0
    /dev/sdd /data4 xfs defaults 0 0
    /dev/sde /data5 xfs defaults 0 0
    /dev/sdf /data6 xfs defaults 0 0

    mount -a

    ##################
    mkdir -p /data{1..6}/primary/s{1..6}
    mkdir -p /data{1..6}/mirror/s{1..6}
    mkdir /data1/master
    chown -R gpadmin:gpadmin /data{1..6}

    #########################3个盘 3个segment
    sudo mkfs.xfs /dev/sdb
    sudo vim /etc/fstab
    /dev/sdb /data1 xfs defaults 0 0

    sudo mkdir /data1 && sudo mount -a

    sudo mkdir -p /data1/primary/s1 ###创建segment数据目录
    sudo mkdir -p /data1/mirror/s1 ###创建mirror数据目录
    sudo mkdir /data1/master
    sudo chown -R gpadmin:gpadmin /data1

    2.7 初始化greenplum 集群
    cd /usr/local/gpdb/docs/cli_help/gpconfigs
    cp gpinitsystem_config /home/gpadmin/conf/gp_conf
    vim gp_conf

    ARRAY_NAME="Greenplum Data Platform"
    SEG_PREFIX=gpseg
    PORT_BASE=6000
    ARRAY_NAME="gp_sydb"
    declare -a DATA_DIRECTORY=(/data1/primary/s1 /data1/primary/s2 ) ######每个目录就是一个segment 集群的segment总数是 当前节点segment目录数量*mpp集群节点数量
    MASTER_HOSTNAME=mpp01
    MASTER_DIRECTORY=/data1/master
    MASTER_DATA_DIRECTORY=/data1/master/gpseg-1
    DATABASE_NAME=gp_sydb
    #添加seg_hosts
    MACHINE_LIST_FILE=/home/gpadmin/conf/seg_hosts
    MASTER_PORT=5432
    TRUSTED_SHELL=ssh
    CHECK_POINT_SEGMENTS=8
    ENCODING=UNICODE
    #mirror config
    MIRROR_PORT_BASE=43000
    REPLICATION_PORT_BASE=34000
    MIRROR_REPLICATION_PORT_BASE=44000
    declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror/s1 /data1/mirror/s2 ) ######每个目录就是一个mirror 集群的mirror总数是 当前节点mirror目录数量*mpp集群节点数量

    #####确认mpp节点主机名
    cat /home/gpadmin/conf/seg_hosts
    mpp01
    mpp02
    mpp03

    #####初始化greenplum
    每个节点执行
    mkdir -p /data1/primary/s1 /data1/primary/s2 && mkdir -p /data1/mirror/s1 /data1/mirror/s2
    ########declare -a DATA_DIRECTORY=(/data1/primary/s1 /data1/primary/s2 ) ######每个目录就是一个segment 集群的segment总数是 当前节点segment目录数量*mpp集群节点数量
    ########declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror/s1 /data1/mirror/s2 ) ######每个目录就是一个mirror 集群的mirror总数是 当前节点mirror目录数量*mpp集群节点数量
    rm -rf /data1/master/gpseg-1
    rm -rf /data1/primary/s{1..4}/*

    初始化之前:
    #在各个节点执行,用来检查gpdb是否正确安装,自检相关的配置文件
    gpssh-exkeys -f seg_hosts master到各个节点通信(测试一下免密登陆)
    gpcheck --local (检查每个节点的状态)
    gpinitsystem -c gp_conf -h seg_hosts

    #遇到的报错
    1.selecting default max_connections ... /usr/local/gpdb/bin/postgres: error while loading shared libraries: libgpopt.so.3: cannot open shared object file: No such file or directory
    no data was returned by command ""/usr/local/gpdb/bin/postgres" -V"
    The program "postgres" is needed by initdb but was either not found in the same directory as "/usr/local/gpdb/bin/initdb" or failed unexpectedly.
    Check your installation; "postgres -V" may have more information.
    每个节点重新安装 插件gporca gp-xercesc 重新加载一下库文件 sudo ldconfig

    #添加配置mirror
    2.mirror error
    /usr/local/gpdb/bin/gpinitsystem: line 1000: ((: MIRROR_REPLICATION_PORT_OFFSET=-: syntax error: operand expected (error token is "-")
    20190517:17:31:29:006271 gpinitsystem:mpp01:gpadmin-[INFO]:-Building group mirror array type , please wait...
    ./usr/local/gpdb/bin/lib/gp_bash_functions.sh: line 585: 0+: syntax error: operand expected (error token is "+")
    20190517:17:31:29:006271 gpinitsystem:mpp01:gpadmin-[INFO]:-Building the Master instance database, please wait...
    MIRROR_PORT_BASE=43000
    REPLICATION_PORT_BASE=34000
    MIRROR_REPLICATION_PORT_BASE=44000

    #####################修改添加所有的磁盘做seg和mirror segment和mirror的数量要对应,如果不对应会导致初始化的时候中断
    mkdir
    /data1/primary /data1/primary /data1/primary /data1/primary /data1/primary /data1/primary /data2/primary /data2/primary /data2/primary /data2/primary /data2/primary /data2/primary /data3/primary /data3/primary /data3/primary /data3/primary /data3/primary /data3/primary /data4/primary /data4/primary /data4/primary /data4/primary /data4/primary /data4/primary /data5/primary /data5/primary /data5/primary /data5/primary /data5/primary /data5/primary /data6/primary /data6/primary /data6/primary /data6/primary /data6/primary /data6/primary

    /data1/mirror /data1/mirror /data1/mirror /data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror /data2/mirror /dat
    a2/mirror /data2/mirror /data2/mirror /data3/mirror /data3/mirror /data3/mirror /data3/mirror /data3/mirror /data3/mirror /data4/mirror /data4/mirror /data4/mirror /data4/mirror /data4/mirror /data4/mirror /data5/mirror /data5/mirror /data5/mirror /data5/mirror /data5/mirror /data5/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror /data6/mirror

    如果初始化失败需要删除mirror和primary里面所有的数据
    rm -rf /data{1..6}/primary/s{1..6}/*
    rm -rf /data{1..6}/mirror/s{1..6}/*
    删除/data1/master/下所有的数据
    rm -rf /data1/master/gpseg-1
    ########检查是否存在漏删的文件:如果存在也会导致初始化报错,一次初始化检测,检查节点的时间比较久,请大家慎重check!
    ls -alt /data{1..6}/mirror/s{1..6} && ls -alt /data{1..6}/primary/s{1..6}/
    ls -alt /data1/master/
    #然后继续初始化greenplum
    gpinitsystem -c gp_conf -h seg_hosts

    greenplum 常用的命令说明:
    参考文档:https://www.cnblogs.com/pl-boke/p/9852383.html


    ########################
    添加standby
    #gpinitstandby -r -a 删除故障的standby节点
    gpinitstandby -r -a mpp02
    ##########
    20190523:11:00:55:010715 gpstate:mpp01:gpadmin-[WARNING]:-Standby PID = 0 <<<<<<<<
    20190523:11:00:55:010715 gpstate:mpp01:gpadmin-[WARNING]:-Standby status = Standby process not running <<<<<<<<

    #gpinitstandby -a -s 添加standby的节点
    gpinitstandby -a -s mpp02
    #添加之前在mpp02 上准备/data1/master/gpseg-1,添加standby master会拷贝master节点的配置到standby master

    #查看集群状态gpstate -f
    gpadmin@mpp01:~$ gpstate -f
    20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-Starting gpstate with args: -f
    20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 6.0.0-beta.1 build dev'
    20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-master Greenplum Version: 'PostgreSQL 9.4.20 (Greenplum Database 6.0.0-beta.1 build dev) on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu 7.4.0-1ubuntu1~18.04) 7.4.0, 64-bit compiled on May 21 2019 14:00:08'
    20190523:11:20:48:011923 gpstate:mpp01:gpadmin-[INFO]:-Obtaining Segment details from master...
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:-Standby master details
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:-----------------------
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby address = mpp02
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby data directory = /data1/master/gpseg-1
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby port = 5432
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby PID = 23755
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:- Standby status = Standby host passive
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--------------------------------------------------------------
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--pg_stat_replication
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--------------------------------------------------------------
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--WAL Sender State: streaming
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--Sync state: sync
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--Sent Location: 0/140000A0
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--Flush Location: 0/140000A0
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--Replay Location: 0/140000A0
    20190523:11:20:49:011923 gpstate:mpp01:gpadmin-[INFO]:--------------------------------------------------------------
    ###############################
    模拟故障切换
    gpstop -a -m 停掉master的postgres
    让mpp02变成主的master
    gpadmin@mpp02:/data1/master/gpseg-1$ gpactivatestandby -a -d /data1/master/gpseg-1
    sh: 0: getcwd() failed: No such file or directory
    20190523:11:27:44:025254 gpactivatestandby:mpp02:gpadmin-[CRITICAL]:-PGPORT environment variable not set.

    solution: check /home/gpadmin/.profile MASTER_DATA_DIRECTORY=/data1/master/gpseg-1/

    gpactivatestandby -a -d /data1/master/gpseg-1

    #主从切换后需要做数据恢复
    TIPS:如果集群出现过primary和Mirror节点的切换,则最好再执行下下面的命令:
    gprecoverseg -r 执行这步操作的原因:如果主节点down了,mirror节点接管后,会造成部分节点负担过重

    gpadmin@mpp02:~$ gprecoverseg -r
    20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-Starting gprecoverseg with args:
    20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-local Greenplum Version: 'postgres (Greenplum Database) 6.0.0-beta.1 build dev'
    20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-master Greenplum Version: 'PostgreSQL 9.4.20 (Greenplum Database 6.0.0-beta.1 build dev) on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu 7.4.0-1ubuntu1~18.04) 7.4.0, 64-bit compiled on May 21 2019 14:00:08'
    20190523:13:37:12:010532 gprecoverseg:mpp02:gpadmin-[INFO]:-Obtaining Segment details from master...
    LOG: failed to acquire resources on one or more segments
    DETAIL: FATAL: no pg_hba.conf entry for host "192.168.22.122", user "gpadmin", database "template1"
    (seg18 192.168.22.122:43018)
    LOG: query plan with multiple segworker groups is not supported
    HINT: likely caused by a function that reads or modifies data in a distributed table
    20190523:13:37:17:010532 gprecoverseg:mpp02:gpadmin-[CRITICAL]:-gprecoverseg failed. (Reason='FATAL: DTM initialization: failure during startup recovery, retry failed, check segment status (cdbtm.c:1328)
    ') exiting...

    修改/data1/master/gpseg-1/pg_hba.conf 添加访问ip到pg_hba.conf
    host all gpadmin 172.16.1.114/32 trust
    host all gpadmin 172.16.1.115/32 trust
    host all gpadmin 172.16.1.116/32 trust
    host all gpadmin 192.168.22.111/32 trust
    host all gpadmin 192.168.22.122/32 trust
    host all gpadmin 192.168.22.133/32 trust

    #主备切换之后
    Total primary segment failures (at master)

    [WARNING]:-Total primary segment failures (at master)
    [WARNING]:-Total number mirror segments acting as primary segments

    #恢复segment mirror
    gprecoverseg
    #### gpstate -m
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Recovery 72 of 72
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:----------------------------------------------------------
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Synchronization mode = Incremental
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance host = mpp03
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance address = mpp03
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance directory = /data6/primary/s6/gpseg107
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Failed instance port = 6035
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance host = mpp01
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance address = mpp01
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance directory = /data6/mirror/s6/gpseg107
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Source instance port = 43035
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:- Recovery Target = in-place
    20190523:13:44:11:034230 gprecoverseg:mpp02:gpadmin-[INFO]:----------------------------------------------------------

    Continue with segment recovery procedure Yy|Nn (default=N): 输入Y

    修复集群
    gprecoverseg -a
    20190523:13:53:49:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Ensuring that shared memory is cleaned up for stopped segments
    20190523:13:53:50:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Updating configuration with new mirrors
    20190523:13:53:50:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Updating mirrors
    20190523:13:53:50:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Running pg_rewind on required mirrors

    检查集群修复的状态
    gpstate -m
    ########
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data5/mirror/s6/gpseg65 43029 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s1/gpseg66 43030 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s2/gpseg67 43031 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s3/gpseg68 43032 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s4/gpseg69 43033 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s5/gpseg70 43034 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp03 /data6/mirror/s6/gpseg71 43035 Passive Synchronized
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s1/gpseg72 43000 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s2/gpseg73 43001 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s3/gpseg74 43002 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s4/gpseg75 43003 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s5/gpseg76 43004 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data1/mirror/s6/gpseg77 43005 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data2/mirror/s1/gpseg78 43006 Acting as Primary Not In Sync
    20190523:13:53:48:038747 gpstate:mpp02:gpadmin-[INFO]:- mpp01 /data2/mirror/s2/gpseg79 43007 Acting as Primary Not In Sync
    #####
    修复成功
    20190523:14:00:32:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-For segments updated successfully, streaming will continue in the background.
    20190523:14:00:32:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-Use gpstate -s to check the streaming progress.
    20190523:14:00:32:034230 gprecoverseg:mpp02:gpadmin-[INFO]:-******************************************************************

    修复primary segment 和 mirror segments

    gpstate -m 查看greenplum的状态

    gpstop -a -M fast 关闭整个greenplum 集群

    以restricted方式启动数据库
    gpstart -a -R

  • 相关阅读:
    【UR #17】滑稽树前做游戏
    chage
    [SDOI2016]储能表——数位DP
    password
    groupdel
    [NOI2017]泳池——概率DP+线性递推
    groupadd
    CF986C AND Graph
    userdel
    CF986C AND Graph
  • 原文地址:https://www.cnblogs.com/corangeh/p/11206867.html
Copyright © 2020-2023  润新知