• 达梦主从环境(配置自动切换)故障模拟


    环境:
    OS:Centos7
    DB:DM8
    主库:192.168.1.135
    备库:192.168.1.134
    主备库dmwatcher.ini配置文件如下:

    [dmdba@host134 slnngk]$ more dmwatcher.ini
    [GRP1]
    DW_TYPE = GLOBAL ##全局守护类型
    DW_MODE = AUTO ##手工切换 AUTO自动切换模式必须部署一个确认监视器
    DW_ERROR_TIME = 10 ##远程守护进程故障认定时间
    INST_RECOVER_TIME = 60 ##主库守护进程启动恢复的间隔时间
    INST_ERROR_TIME = 10 ##本地实例故障认定时间
    INST_OGUID = 453332 ##守护系统唯一 OGUID 值
    INST_INI = /dmdbms/data/slnngk/dm.ini #dm.ini配置文件路径
    INST_AUTO_RESTART = 1 ##打开实例的自动启动功能
    INST_STARTUP_CMD = /dmdbms/product/bin/dmserver #命令行方式启动
    RLOG_SEND_THRESHOLD = 0 ##指定主库发送日志到备库的时间阀值,默认关闭
    RLOG_APPLY_THRESHOLD = 0 ##指定备库重演日志的时间阀值,默认关闭

    1.备库停掉数据库

    [root@host134 init.d]# ps -ef|grep slnngk
    dmdba    24121     1  0 09:14 pts/0    00:00:01 /dmdbms/product/bin/dmserver path=/dmdbms/data/slnngk/dm.ini -noconsole
    dmdba    24392     1  0 09:15 ?        00:00:01 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
    root     25527 14412  0 09:25 pts/1    00:00:00 grep --color=auto slnngk

    我们这里直接kill掉数据库服务进程
    [root@host134 init.d]#kill -9 24121
    这个时候主从不会发生切换,备库的守护进程会把备库拉起来

    [root@host134 init.d]# ps -ef|grep slnngk
    dmdba    24392     1  0 09:15 ?        00:00:01 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
    dmdba    25717     1  0 09:26 ?        00:00:00 /dmdbms/product/bin/dmserver /dmdbms/data/slnngk/dm.ini mount
    root     26051 14412  0 09:29 pts/1    00:00:00 grep --color=auto slnngk

    2.备库停掉守护进程

    systemctl stop DmWatcherServiceGRP1
    或是杀掉进程

    [root@host134 init.d]# ps -ef|grep slnngk
    dmdba    24392     1  0 09:15 ?        00:00:01 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
    dmdba    25717     1  0 09:26 ?        00:00:00 /dmdbms/product/bin/dmserver /dmdbms/data/slnngk/dm.ini mount
    root     26051 14412  0 09:29 pts/1    00:00:00 grep --color=auto slnngk

    [root@host134 init.d]#kill -9 24392
    这个时候数据库进程和守护进程都没有了
    [root@host134 init.d]# ps -ef|grep slnngk
    root 26545 14412 0 09:32 pts/1 00:00:00 grep --color=auto slnngk

    主备环境没有切换,但是备库目前的状态是INVALID无效的

    单独停掉备库守护进程,但是数据库是正常的,这个时候主备环境是正常的,也不会发生切换

    [root@host134 init.d]# systemctl stop DmWatcherServiceGRP1
    [root@host134 init.d]# ps -ef|grep slnngk
    dmdba    26852     1  0 09:34 ?        00:00:01 /dmdbms/product/bin/dmserver path=/dmdbms/data/slnngk/dm.ini -noconsole
    root     27846 14412  0 09:40 pts/1    00:00:00 grep --color=auto slnngk

    3.停掉主库数据库服务

    [root@host135 ~]# ps -ef|grep slnngk
    root      7942 17467  0 09:53 pts/6    00:00:00 grep --color=auto slnngk
    dmdba    30089     1  0 08:56 ?        00:00:05 /dmdbms/product/bin/dmserver path=/dmdbms/data/slnngk/dm.ini -noconsole
    dmdba    30424     1  0 08:57 ?        00:00:04 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
    dmdba    30724     1  0 08:58 ?        00:00:04 /dmdbms/product/bin/dmmonitor path=/dmdbms/data/slnngk/dmmonitor.ini
    
    [root@host135 ~]# kill -9 30089

    这个时候主库的数据库守护进程会把数据库服务拉起来,发现主从未切换

    4.停掉数据库服务守护进程

    [root@host135 ~]# ps -ef|grep slnngk
    dmdba     8167     1  0 09:54 ?        00:00:04 /dmdbms/product/bin/dmserver /dmdbms/data/slnngk/dm.ini mount
    root     18111  2596  0 10:51 pts/5    00:00:00 grep --color=auto slnngk
    dmdba    30424     1  0 08:57 ?        00:00:09 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
    dmdba    30724     1  0 08:58 ?        00:00:09 /dmdbms/product/bin/dmmonitor path=/dmdbms/data/slnngk/dmmonitor.ini

    直接kill掉
    [root@host135 ~]# kill -9 30424

    这个时候守护进程和数据库进程都停掉了

    [root@host135 ~]# ps -ef|grep slnngk
    root     18306  2596  0 10:52 pts/5    00:00:00 grep --color=auto slnngk
    dmdba    30724     1  0 08:58 ?        00:00:09 /dmdbms/product/bin/dmmonitor path=/dmdbms/data/slnngk/dmmonitor.ini

    数据库主备发生了切换,192.168.1.134变成了主库

    show
    2022-07-27 10:53:19 
    #================================================================================#
    GROUP            OGUID       MON_CONFIRM     MODE            MPP_FLAG  
    GRP1             453332      FALSE           AUTO            FALSE     
    
    
    <<DATABASE GLOBAL INFO:>>
    DW_IP               MAL_DW_PORT  WTIME                WTYPE     WCTLSTAT  WSTATUS        INAME            INST_OK   N_EP  N_OK  ISTATUS     IMODE     DSC_STATUS     RTYPE     RSTAT    
    192.168.1.134       52141        2022-07-27 10:53:19  GLOBAL    VALID     OPEN           SLNNGKBAK        OK        1     1     OPEN        PRIMARY   DSC_OPEN       REALTIME  VALID    
    
    EP INFO:
    INST_IP             INST_PORT  INST_OK   INAME            ISTATUS     IMODE     DSC_SEQNO  DSC_CTL_NODE RTYPE     RSTAT    FSEQ            FLSN            CSEQ            CLSN            DW_STAT_FLAG          
    192.168.1.134       5236       OK        SLNNGKBAK        OPEN        PRIMARY   0          0            REALTIME  VALID    407242          468571          407242          468571          NONE                  
    
    #================================================================================#

    尝试将原来的主库启动起来
    [root@host135 ~]#systemctl start DmServiceslnngk.service
    [root@host135 ~]#systemctl start DmWatcherServiceGRP1

    这个时候原来的主库启动后,充当了备库的角色了

    show
    2022-07-27 10:57:44 
    #================================================================================#
    GROUP            OGUID       MON_CONFIRM     MODE            MPP_FLAG  
    GRP1             453332      FALSE           AUTO            FALSE     
    
    
    <<DATABASE GLOBAL INFO:>>
    DW_IP               MAL_DW_PORT  WTIME                WTYPE     WCTLSTAT  WSTATUS        INAME            INST_OK   N_EP  N_OK  ISTATUS     IMODE     DSC_STATUS     RTYPE     RSTAT    
    192.168.1.134       52141        2022-07-27 10:57:44  GLOBAL    VALID     OPEN           SLNNGKBAK        OK        1     1     OPEN        PRIMARY   DSC_OPEN       REALTIME  VALID    
    
    EP INFO:
    INST_IP             INST_PORT  INST_OK   INAME            ISTATUS     IMODE     DSC_SEQNO  DSC_CTL_NODE RTYPE     RSTAT    FSEQ            FLSN            CSEQ            CLSN            DW_STAT_FLAG          
    192.168.1.134       5236       OK        SLNNGKBAK        OPEN        PRIMARY   0          0            REALTIME  VALID    407331          468659          407331          468660          NONE                  
    
    <<DATABASE GLOBAL INFO:>>
    DW_IP               MAL_DW_PORT  WTIME                WTYPE     WCTLSTAT  WSTATUS        INAME            INST_OK   N_EP  N_OK  ISTATUS     IMODE     DSC_STATUS     RTYPE     RSTAT    
    192.168.1.135       52141        2022-07-27 10:57:44  GLOBAL    VALID     OPEN           SLNNGK           OK        1     1     OPEN        STANDBY   DSC_OPEN       REALTIME  VALID    
    
    EP INFO:
    INST_IP             INST_PORT  INST_OK   INAME            ISTATUS     IMODE     DSC_SEQNO  DSC_CTL_NODE RTYPE     RSTAT    FSEQ            FLSN            CSEQ            CLSN            DW_STAT_FLAG          
    192.168.1.135       5236       OK        SLNNGK           OPEN        STANDBY   0          0            REALTIME  VALID    407228          468659          407228          468659          NONE                  
    
    DATABASE(SLNNGK) APPLY INFO FROM (SLNNGKBAK), REDOS_PARALLEL_NUM (1):
    DSC_SEQNO[0], (RSEQ, SSEQ, KSEQ)[407331, 407331, 407331], (RLSN, SLSN, KLSN)[468659, 468659, 468659], N_TSK[0], TSK_MEM_USE[0] 
    REDO_LSN_ARR: (468659)
    
    
    #================================================================================#
  • 相关阅读:
    python——numpy (二)
    python——numpy(一)
    python——matplotlib
    redis
    图片验证码识别技术——Tesseraact
    Linux 环境变量PROMPT_COMMAND
    maven项目管理工具
    Log4J日志组件
    java中的泛型,注解
    数据库备份还原
  • 原文地址:https://www.cnblogs.com/hxlasky/p/16524107.html
Copyright © 2020-2023  润新知