环境:
OS:Centos7
DB:DM8
主库:192.168.1.135
备库:192.168.1.134
主备库dmwatcher.ini配置文件如下:
[dmdba@host134 slnngk]$ more dmwatcher.ini
[GRP1]
DW_TYPE = GLOBAL ##全局守护类型
DW_MODE = AUTO ##手工切换 AUTO自动切换模式必须部署一个确认监视器
DW_ERROR_TIME = 10 ##远程守护进程故障认定时间
INST_RECOVER_TIME = 60 ##主库守护进程启动恢复的间隔时间
INST_ERROR_TIME = 10 ##本地实例故障认定时间
INST_OGUID = 453332 ##守护系统唯一 OGUID 值
INST_INI = /dmdbms/data/slnngk/dm.ini #dm.ini配置文件路径
INST_AUTO_RESTART = 1 ##打开实例的自动启动功能
INST_STARTUP_CMD = /dmdbms/product/bin/dmserver #命令行方式启动
RLOG_SEND_THRESHOLD = 0 ##指定主库发送日志到备库的时间阀值,默认关闭
RLOG_APPLY_THRESHOLD = 0 ##指定备库重演日志的时间阀值,默认关闭
1.备库停掉数据库
[root@host134 init.d]# ps -ef|grep slnngk
dmdba 24121 1 0 09:14 pts/0 00:00:01 /dmdbms/product/bin/dmserver path=/dmdbms/data/slnngk/dm.ini -noconsole
dmdba 24392 1 0 09:15 ? 00:00:01 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
root 25527 14412 0 09:25 pts/1 00:00:00 grep --color=auto slnngk
我们这里直接kill掉数据库服务进程
[root@host134 init.d]#kill -9 24121
这个时候主从不会发生切换,备库的守护进程会把备库拉起来
[root@host134 init.d]# ps -ef|grep slnngk
dmdba 24392 1 0 09:15 ? 00:00:01 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
dmdba 25717 1 0 09:26 ? 00:00:00 /dmdbms/product/bin/dmserver /dmdbms/data/slnngk/dm.ini mount
root 26051 14412 0 09:29 pts/1 00:00:00 grep --color=auto slnngk
2.备库停掉守护进程
systemctl stop DmWatcherServiceGRP1
或是杀掉进程
[root@host134 init.d]# ps -ef|grep slnngk
dmdba 24392 1 0 09:15 ? 00:00:01 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
dmdba 25717 1 0 09:26 ? 00:00:00 /dmdbms/product/bin/dmserver /dmdbms/data/slnngk/dm.ini mount
root 26051 14412 0 09:29 pts/1 00:00:00 grep --color=auto slnngk
[root@host134 init.d]#kill -9 24392
这个时候数据库进程和守护进程都没有了
[root@host134 init.d]# ps -ef|grep slnngk
root 26545 14412 0 09:32 pts/1 00:00:00 grep --color=auto slnngk
主备环境没有切换,但是备库目前的状态是INVALID无效的
单独停掉备库守护进程,但是数据库是正常的,这个时候主备环境是正常的,也不会发生切换
[root@host134 init.d]# systemctl stop DmWatcherServiceGRP1
[root@host134 init.d]# ps -ef|grep slnngk
dmdba 26852 1 0 09:34 ? 00:00:01 /dmdbms/product/bin/dmserver path=/dmdbms/data/slnngk/dm.ini -noconsole
root 27846 14412 0 09:40 pts/1 00:00:00 grep --color=auto slnngk
3.停掉主库数据库服务
[root@host135 ~]# ps -ef|grep slnngk
root 7942 17467 0 09:53 pts/6 00:00:00 grep --color=auto slnngk
dmdba 30089 1 0 08:56 ? 00:00:05 /dmdbms/product/bin/dmserver path=/dmdbms/data/slnngk/dm.ini -noconsole
dmdba 30424 1 0 08:57 ? 00:00:04 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
dmdba 30724 1 0 08:58 ? 00:00:04 /dmdbms/product/bin/dmmonitor path=/dmdbms/data/slnngk/dmmonitor.ini
[root@host135 ~]# kill -9 30089
这个时候主库的数据库守护进程会把数据库服务拉起来,发现主从未切换
4.停掉数据库服务守护进程
[root@host135 ~]# ps -ef|grep slnngk
dmdba 8167 1 0 09:54 ? 00:00:04 /dmdbms/product/bin/dmserver /dmdbms/data/slnngk/dm.ini mount
root 18111 2596 0 10:51 pts/5 00:00:00 grep --color=auto slnngk
dmdba 30424 1 0 08:57 ? 00:00:09 /dmdbms/product/bin/dmwatcher path=/dmdbms/data/slnngk/dmwatcher.ini -noconsole
dmdba 30724 1 0 08:58 ? 00:00:09 /dmdbms/product/bin/dmmonitor path=/dmdbms/data/slnngk/dmmonitor.ini
直接kill掉
[root@host135 ~]# kill -9 30424
这个时候守护进程和数据库进程都停掉了
[root@host135 ~]# ps -ef|grep slnngk
root 18306 2596 0 10:52 pts/5 00:00:00 grep --color=auto slnngk
dmdba 30724 1 0 08:58 ? 00:00:09 /dmdbms/product/bin/dmmonitor path=/dmdbms/data/slnngk/dmmonitor.ini
数据库主备发生了切换,192.168.1.134变成了主库
show
2022-07-27 10:53:19
#================================================================================#
GROUP OGUID MON_CONFIRM MODE MPP_FLAG
GRP1 453332 FALSE AUTO FALSE
<<DATABASE GLOBAL INFO:>>
DW_IP MAL_DW_PORT WTIME WTYPE WCTLSTAT WSTATUS INAME INST_OK N_EP N_OK ISTATUS IMODE DSC_STATUS RTYPE RSTAT
192.168.1.134 52141 2022-07-27 10:53:19 GLOBAL VALID OPEN SLNNGKBAK OK 1 1 OPEN PRIMARY DSC_OPEN REALTIME VALID
EP INFO:
INST_IP INST_PORT INST_OK INAME ISTATUS IMODE DSC_SEQNO DSC_CTL_NODE RTYPE RSTAT FSEQ FLSN CSEQ CLSN DW_STAT_FLAG
192.168.1.134 5236 OK SLNNGKBAK OPEN PRIMARY 0 0 REALTIME VALID 407242 468571 407242 468571 NONE
#================================================================================#
尝试将原来的主库启动起来
[root@host135 ~]#systemctl start DmServiceslnngk.service
[root@host135 ~]#systemctl start DmWatcherServiceGRP1
这个时候原来的主库启动后,充当了备库的角色了
show
2022-07-27 10:57:44
#================================================================================#
GROUP OGUID MON_CONFIRM MODE MPP_FLAG
GRP1 453332 FALSE AUTO FALSE
<<DATABASE GLOBAL INFO:>>
DW_IP MAL_DW_PORT WTIME WTYPE WCTLSTAT WSTATUS INAME INST_OK N_EP N_OK ISTATUS IMODE DSC_STATUS RTYPE RSTAT
192.168.1.134 52141 2022-07-27 10:57:44 GLOBAL VALID OPEN SLNNGKBAK OK 1 1 OPEN PRIMARY DSC_OPEN REALTIME VALID
EP INFO:
INST_IP INST_PORT INST_OK INAME ISTATUS IMODE DSC_SEQNO DSC_CTL_NODE RTYPE RSTAT FSEQ FLSN CSEQ CLSN DW_STAT_FLAG
192.168.1.134 5236 OK SLNNGKBAK OPEN PRIMARY 0 0 REALTIME VALID 407331 468659 407331 468660 NONE
<<DATABASE GLOBAL INFO:>>
DW_IP MAL_DW_PORT WTIME WTYPE WCTLSTAT WSTATUS INAME INST_OK N_EP N_OK ISTATUS IMODE DSC_STATUS RTYPE RSTAT
192.168.1.135 52141 2022-07-27 10:57:44 GLOBAL VALID OPEN SLNNGK OK 1 1 OPEN STANDBY DSC_OPEN REALTIME VALID
EP INFO:
INST_IP INST_PORT INST_OK INAME ISTATUS IMODE DSC_SEQNO DSC_CTL_NODE RTYPE RSTAT FSEQ FLSN CSEQ CLSN DW_STAT_FLAG
192.168.1.135 5236 OK SLNNGK OPEN STANDBY 0 0 REALTIME VALID 407228 468659 407228 468659 NONE
DATABASE(SLNNGK) APPLY INFO FROM (SLNNGKBAK), REDOS_PARALLEL_NUM (1):
DSC_SEQNO[0], (RSEQ, SSEQ, KSEQ)[407331, 407331, 407331], (RLSN, SLSN, KLSN)[468659, 468659, 468659], N_TSK[0], TSK_MEM_USE[0]
REDO_LSN_ARR: (468659)
#================================================================================#