Centos6.5 X86 64 2014-04-27
# yum -y install corosync pacemaker # yum -y install crmsh-1.2.6-4.el6.x86_64.rpm pssh-2.3.1-2.el6.x86_64.rpm # umount /mnt/ # service drbd stop # chkconfig drbd off 2、配置corosync,(以下命令在node1.magedu.com上执行) # cd /etc/corosync # cp corosync.conf.example corosync.conf 接着编辑corosync.conf,添加如下内容: service { ver: 0 name: pacemaker # use_mgmtd: yes } aisexec { user: root group: root } [root@node4 corosync]# mv /dev/random /dev/h [root@node4 corosync]# ln /dev/urandom /dev/random [root@node4 corosync]# corosync-keygen Corosync Cluster Engine Authentication key generator. Gathering 1024 bits for key from /dev/random. Press keys on your keyboard to generate entropy. Writing corosync key to /etc/corosync/authkey. [root@node4 corosync]# rm -rf /dev/random [root@node4 corosync]# mv /dev/h /dev/random [root@node4 corosync]# service corosync start [root@node4 corosync]# crm crm(live)# configure pcrm(live)configure# property stonith-enabled=false crm(live)configure# property no-quorum-policy=ignore crm(live)configure# rsc_defaults resource-stickiness=100 crm(live)configure# verify crm(live)configure# commit crm(live)configure# cd crm(live)# configure
1、查看当前集群的配置信息,确保已经配置全局属性参数为两节点集群所适用: crm(live)configure# show node node3.nginx.fx node node4.mysql.com property $id="cib-bootstrap-options" \ dc-version="1.1.10-14.el6_5.3-368c726" \ cluster-infrastructure="classic openais (with plugin)" \ expected-quorum-votes="2" \ stonith-enabled="false" \ no-quorum-policy="ignore" rsc_defaults $id="rsc-options" \ resource-stickiness="100" crm(live)configure# primitive mysqlstore ocf:linbit:drbd params drbd_resource=mystore op monitor role=Master interval=30s timeout=20s op monitor role=Slave interval=60s timeout=20s op sta rt timeout=240s op stop timeout=100s crm(live)configure# verify crm(live)configure# master ms_mysqlstore mysqlstore meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify="True" crm(live)configure# verify crm(live)configure# commit crm(live)configure# show node node3.nginx.fx node node4.mysql.com primitive mysqlstore ocf:linbit:drbd \ params drbd_resource="mystore" \ op monitor role="Master" interval="30s" timeout="20s" \ op monitor role="Slave" interval="60s" timeout="20s" \ op start timeout="240s" interval="0" \ op stop timeout="100s" interval="0" ms ms_mysqlstore mysqlstore \ meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="True" property $id="cib-bootstrap-options" \ dc-version="1.1.10-14.el6_5.3-368c726" \ cluster-infrastructure="classic openais (with plugin)" \ expected-quorum-votes="2" \ stonith-enabled="false" \ no-quorum-policy="ignore" rsc_defaults $id="rsc-options" \ resource-stickiness="100" crm(live)configure# cd crm(live)# status Last updated: Sun Apr 27 06:11:37 2014 Last change: Sun Apr 27 06:09:29 2014 via cibadmin on node4.mysql.com Stack: classic openais (with plugin) Current DC: node4.mysql.com - partition with quorum Version: 1.1.10-14.el6_5.3-368c726 2 Nodes configured, 2 expected votes 2 Resources configured Online: [ node3.nginx.fx node4.mysql.com ] Master/Slave Set: ms_mysqlstore [mysqlstore] Masters: [ node4.mysql.com ] Slaves: [ node3.nginx.fx ] crm(live)# node crm(live)node# standby node4.mysql.com crm(live)node# cd crm(live)# status Last updated: Sun Apr 27 06:14:18 2014 Last change: Sun Apr 27 06:14:12 2014 via crm_attribute on node4.mysql.com Stack: classic openais (with plugin) Current DC: node4.mysql.com - partition with quorum Version: 1.1.10-14.el6_5.3-368c726 2 Nodes configured, 2 expected votes 2 Resources configured Node node4.mysql.com: standby Online: [ node3.nginx.fx ] Master/Slave Set: ms_mysqlstore [mysqlstore] Masters: [ node3.nginx.fx ] Stopped: [ node4.mysql.com ] crm(live)# node online node4.mysql.com crm(live)# status Last updated: Sun Apr 27 06:14:46 2014 Last change: Sun Apr 27 06:14:42 2014 via crm_attribute on node4.mysql.com Stack: classic openais (with plugin) Current DC: node4.mysql.com - partition with quorum Version: 1.1.10-14.el6_5.3-368c726 2 Nodes configured, 2 expected votes 2 Resources configured Online: [ node3.nginx.fx node4.mysql.com ] Master/Slave Set: ms_mysqlstore [mysqlstore] Masters: [ node3.nginx.fx ] Slaves: [ node4.mysql.com ] crm(live)# configure show node node3.nginx.fx node node4.mysql.com \ attributes standby="off" primitive mysqlstore ocf:linbit:drbd \ params drbd_resource="mystore" \ op monitor role="Master" interval="30s" timeout="20s" \ op monitor role="Slave" interval="60s" timeout="20s" \ op start timeout="240s" interval="0" \ op stop timeout="100s" interval="0" ms ms_mysqlstore mysqlstore \ meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="True" property $id="cib-bootstrap-options" \ dc-version="1.1.10-14.el6_5.3-368c726" \ cluster-infrastructure="classic openais (with plugin)" \ expected-quorum-votes="2" \ stonith-enabled="false" \ no-quorum-policy="ignore" rsc_defaults $id="rsc-options" \ resource-stickiness="100"
Online: [ node3.nginx.fx node4.mysql.com ]
Master/Slave Set: ms_mysqlstore [mysqlstore]
Masters: [ node3.nginx.fx ]
Slaves: [ node4.mysql.com ]
[root@node3 ~]# service drbd status
drbd driver loaded OK; device status:
version: 8.4.3 (api:1/proto:86-101)
GIT-hash: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-11-29 12:28:00
m:res cs ro ds p mounted fstype
0:mystore Connected Primary/Secondary UpToDate/UpToDate C
[root@node4 ~]# service drbd status
drbd driver loaded OK; device status:
version: 8.4.3 (api:1/proto:86-101)
GIT-hash: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-11-29 12:28:00
m:res cs ro ds p mounted fstype
0:mystore Connected Secondary/Primary UpToDate/UpToDate C
前提:
1)本配置共有两个测试节点,分别node1.a.org和node2.a.org,相的IP地址分别为192.168.0.5和192.168.0.6;
2)node1和node2两个节点已经配置好了基于openais/corosync的集群;且node1和node2也已经配置好了Primary/Secondary模型的drbd设备/dev/drbd0,且对应的资源名称为web;如果您此处的配置有所不同,请确保后面的命令中使用到时与您的配置修改此些信息与您所需要的配置保持一致;
3)系统为rhel5.4,x86平台;
1、查看当前集群的配置信息,确保已经配置全局属性参数为两节点集群所适用: # crm configure show node node1.a.org node node2.a.org property $id="cib-bootstrap-options" \ dc-version="1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87" \ cluster-infrastructure="openais" \ expected-quorum-votes="2" \ stonith-enabled="false" \ last-lrm-refresh="1308059765" \ no-quorum-policy="ignore" 在如上输出的信息中,请确保有stonith-enabled和no-quorum-policy出现且其值与如上输出信息中相同。否则,可以分别使用如下命令进行配置: # crm configure property stonith-enabled=false # crm configure property no-quorum-policy=ignore 2、将已经配置好的drbd设备/dev/drbd0定义为集群服务; 1)按照集群服务的要求,首先确保两个节点上的drbd服务已经停止,且不会随系统启动而自动启动: # drbd-overview 0:web Unconfigured . . . . # chkconfig drbd off 2)配置drbd为集群资源: 提供drbd的RA目前由OCF归类为linbit,其路径为/usr/lib/ocf/resource.d/linbit/drbd。我们可以使用如下命令来查看此RA及RA的meta信息: # crm ra classes heartbeat lsb ocf / heartbeat linbit pacemaker stonith # crm ra list ocf linbit drbd # crm ra info ocf:linbit:drbd This resource agent manages a DRBD resource as a master/slave resource. DRBD is a shared-nothing replicated storage device. (ocf:linbit:drbd) Master/Slave OCF Resource Agent for DRBD Parameters (* denotes required, [] the default): drbd_resource* (string): drbd resource name The name of the drbd resource from the drbd.conf file. drbdconf (string, [/etc/drbd.conf]): Path to drbd.conf Full path to the drbd.conf file. Operations' defaults (advisory minimum): start timeout=240 promote timeout=90 demote timeout=90 notify timeout=90 stop timeout=100 monitor_Slave interval=20 timeout=20 start-delay=1m monitor_Master interval=10 timeout=20 start-delay=1m drbd需要同时运行在两个节点上,但只能有一个节点(primary/secondary模型)是Master,而另一个节点为Slave;因此,它是一种比较特殊的集群资源,其资源类型为多态(Multi-state)clone类型,即主机节点有Master和Slave之分,且要求服务刚启动时两个节点都处于slave状态。 [root@node1 ~]# crm crm(live)# configure crm(live)configure# primitive webdrbd ocf:linbit:drbd params drbd_resource=web op monitor role=Master interval=50s timeout=30s op monitor role=Slave interval=60s timeout=30s crm(live)configure# master MS_Webdrbd webdrbd meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" crm(live)configure# show webdrbd primitive webdrbd ocf:linbit:drbd \ params drbd_resource="web" \ op monitor interval="15s" crm(live)configure# show MS_Webdrbd ms MS_Webdrbd webdrbd \ meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" crm(live)configure# verify crm(live)configure# commit 查看当前集群运行状态: # crm status ============ Last updated: Fri Jun 17 06:24:03 2011 Stack: openais Current DC: node2.a.org - partition with quorum Version: 1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87 2 Nodes configured, 2 expected votes 1 Resources configured. ============ Online: [ node2.a.org node1.a.org ] Master/Slave Set: MS_Webdrbd Masters: [ node2.a.org ] Slaves: [ node1.a.org ] 由上面的信息可以看出此时的drbd服务的Primary节点为node2.a.org,Secondary节点为node1.a.org。当然,也可以在node2上使用如下命令验正当前主机是否已经成为web资源的Primary节点: # drbdadm role web Primary/Secondary 3)为Primary节点上的web资源创建自动挂载的集群服务 MS_Webdrbd的Master节点即为drbd服务web资源的Primary节点,此节点的设备/dev/drbd0可以挂载使用,且在某集群服务的应用当中也需要能够实现自动挂载。假设我们这里的web资源是为Web服务器集群提供网页文件的共享文件系统,其需要挂载至/www(此目录需要在两个节点都已经建立完成)目录。 此外,此自动挂载的集群资源需要运行于drbd服务的Master节点上,并且只能在drbd服务将某节点设置为Primary以后方可启动。因此,还需要为这两个资源建立排列约束和顺序约束。 # crm crm(live)# configure crm(live)configure# primitive WebFS ocf:heartbeat:Filesystem params device="/dev/drbd0" directory="/www" fstype="ext3" crm(live)configure# colocation WebFS_on_MS_webdrbd inf: WebFS MS_Webdrbd:Master crm(live)configure# order WebFS_after_MS_Webdrbd inf: MS_Webdrbd:promote WebFS:start crm(live)configure# verify crm(live)configure# commit 查看集群中资源的运行状态: crm status ============ Last updated: Fri Jun 17 06:26:03 2011 Stack: openais Current DC: node2.a.org - partition with quorum Version: 1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87 2 Nodes configured, 2 expected votes 2 Resources configured. ============ Online: [ node2.a.org node1.a.org ] Master/Slave Set: MS_Webdrbd Masters: [ node2.a.org ] Slaves: [ node1.a.org ] WebFS (ocf::heartbeat:Filesystem): Started node2.a.org 由上面的信息可以发现,此时WebFS运行的节点和drbd服务的Primary节点均为node2.a.org;我们在node2上复制一些文件至/www目录(挂载点),而后在故障故障转移后查看node1的/www目录下是否存在这些文件。 # cp /etc/rc./rc.sysinit /www 下面我们模拟node2节点故障,看此些资源可否正确转移至node1。 以下命令在Node2上执行: # crm node standby # crm status ============ Last updated: Fri Jun 17 06:27:03 2011 Stack: openais Current DC: node2.a.org - partition with quorum Version: 1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87 2 Nodes configured, 2 expected votes 2 Resources configured. ============ Node node2.a.org: standby Online: [ node1.a.org ] Master/Slave Set: MS_Webdrbd Masters: [ node1.a.org ] Stopped: [ webdrbd:0 ] WebFS (ocf::heartbeat:Filesystem): Started node1.a.org 由上面的信息可以推断出,node2已经转入standby模式,其drbd服务已经停止,但故障转移已经完成,所有资源已经正常转移至node1。 在node1可以看到在node2作为primary节点时产生的保存至/www目录中的数据,在node1上均存在一份拷贝。 让node2重新上线: # crm node online [root@node2 ~]# crm status ============ Last updated: Fri Jun 17 06:30:05 2011 Stack: openais Current DC: node2.a.org - partition with quorum Version: 1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87 2 Nodes configured, 2 expected votes 2 Resources configured. ============ Online: [ node2.a.org node1.a.org ] Master/Slave Set: MS_Webdrbd Masters: [ node1.a.org ] Slaves: [ node2.a.org ] WebFS (ocf::heartbeat:Filesystem): Started node1.a.org mysql+drbd+corosync node node1.magedu.com node node2.magedu.com primitive mysqldrbd ocf:linbit:drbd \ params drbd_resource="mysqlres" \ op monitor interval="30s" role="Master" timeout="30s" \ op monitor interval="40s" role="Slave" timeout="30s" \ op start interval="0" timeout="240" \ op stop interval="0" timeout="100" primitive mysqlfs ocf:heartbeat:Filesystem \ params device="/dev/drbd0" directory="/data/mydata" fstype="ext3" \ op start interval="0" timeout="60s" \ op stop interval="0" timeout="60s" primitive mysqlserver lsb:mysqld primitive mysqlvip ocf:heartbeat:IPaddr \ params ip="172.16.100.1" ms ms_mysqldrbd mysqldrbd \ meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" colocation mysqlfs_with_ms_mysqldrbd inf: mysqlfs ms_mysqldrbd:Master colocation mysqlserver_with_mysqlfs inf: mysqlfs mysqlserver colocation mysqlvip_with_mysqlserver inf: mysqlvip mysqlserver order mysqlfs_after_ms_mysqldrbd inf: ms_mysqldrbd:promote mysqlfs:start order mysqlserver_after_mysqlfs inf: mysqlfs mysqlserver property $id="cib-bootstrap-options" \ dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \ cluster-infrastructure="openais" \ expected-quorum-votes="2" \ stonith-enabled="false" \ no-quorum-policy="ignore" 版本2: crm(live)# configure crm(live)configure# SHOW node node1.magedu.com \ attributes standby="off" node node2.magedu.com \ attributes standby="off" primitive myip ocf:heartbeat:IPaddr \ params ip="172.16.100.1" nic="eth0" cidr_netmask="255.255.0.0" primitive mysqld lsb:mysqld primitive mysqldrbd ocf:heartbeat:drbd \ params drbd_resource="mydrbd" \ op start interval="0" timeout="240" \ op stop interval="0" timeout="100" \ op monitor interval="20" role="Master" timeout="30" \ op monitor interval="30" role="Slave" timeout="30" primitive mystore ocf:heartbeat:Filesystem \ params device="/dev/drbd0" directory="/mydata" fstype="ext3" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" ms ms_mysqldrbd mysqldrbd \ meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" colocation myip_with_ms_mysqldrbd inf: ms_mysqldrbd:Master myip colocation mysqld_with_mystore inf: mysqld mystore colocation mystore_with_ms_mysqldrbd inf: mystore ms_mysqldrbd:Master order mysqld_after_mystore inf: mystore mysqld order mystore_after_ms_mysqldrbd inf: ms_mysqldrbd:promote mystore:start property $id="cib-bootstrap-options" \ dc-version="1.1.5-1.1.el5-01e86afaaa6d4a8c4836f68df80ababd6ca3902f" \ cluster-infrastructure="openais" \ expected-quorum-votes="2" \ stonith-enabled="false" \ no-quorum-policy="ignore" \ last-lrm-refresh="1368438416" rsc_defaults $id="rsc-options" \ resource-stickiness="100" 版本3(第11期用) [root@node1 ~]# crm configure show node node1.magedu.com \ attributes standby="off" node node2.magedu.com \ attributes standby="off" primitive myip ocf:heartbeat:IPaddr \ params ip="172.16.100.101" \ op monitor interval="20" timeout="20" on-fail="restart" primitive myserver lsb:mysqld \ op monitor interval="20" timeout="20" on-fail="restart" primitive mysql_drbd ocf:linbit:drbd \ params drbd_resource="mydata" \ op monitor role="Master" interval="10" timeout="20" \ op monitor role="Slave" interval="20" timeout="20" \ op start timeout="240" interval="0" \ op stop timeout="100" interval="0" primitive mystore ocf:heartbeat:Filesystem \ params device="/dev/drbd0" directory="/mydata" fstype="ext4" \ op monitor interval="40" timeout="40" \ op start timeout="60" interval="0" \ op stop timeout="60" interval="0" ms ms_mysql_drbd mysql_drbd \ meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" colocation myip_with_myserver inf: myip myserver colocation myserver_with_mystore inf: myserver mystore colocation mystore_with_ms_mysql_drbd_master inf: mystore ms_mysql_drbd:Master order ms_mysql_drbd_before_mystore inf: ms_mysql_drbd:promote mystore:start order myip_before_myserver inf: myip myserver order mystore_before_myserver inf: mystore:start myserver:start property $id="cib-bootstrap-options" \ dc-version="1.1.8-7.el6-394e906" \ cluster-infrastructure="classic openais (with plugin)" \ expected-quorum-votes="2" \ stonith-enabled="false" \ last-lrm-refresh="1379316850" \ no-quorum-policy="ignore" 使用双主模型: 一、设定资源启用双主模型 resource <resource> { startup { become-primary-on both; ... } net { allow-two-primaries yes; after-sb-0pri discard-zero-changes; after-sb-1pri discard-secondary; after-sb-2pri disconnect; ... } ... } 同时,包括双主drbd模型中的任何集群文件系统都需要fencing功能,且要求其不仅要在资源级别实现,也要在节点级别实现STONITH功能。 disk { fencing resource-and-stonith; } handlers { outdate-peer "/sbin/make-sure-the-other-node-is-confirmed-dead.sh" } 二、使用GFS2文件系统 三、结合RHCS时的资源定义示例 <rm> <resources /> <service autostart="1" name="mysql"> <drbd name="drbd-mysql" resource="mydrbd"> <fs device="/dev/drbd0" mountpoint="/var/lib/mysql" fstype="ext3" name="mydrbd" options="noatime"/> </drbd> <ip address="172.16.100.8" monitor_link="1"/> <mysql config_file="/etc/my.cnf" listen_address="172.16.100.8" name="mysqld"/> </service> </rm> 多节点同时启动一个IP node node1.magedu.com \ attributes standby="off" node node2.magedu.com node node3.magedu.com \ attributes standby="off" primitive DLM ocf:pacemaker:controld \ params daemon="/usr/sbin/dlm_controld" \ op start interval="0" timeout="90" \ op stop interval="0" timeout="100" primitive clusterip ocf:heartbeat:IPaddr2 \ params ip="172.16.200.7" cidr_netmask="32" clusterip_hash="sourceip" clone WebIP clusterip \ meta globally-unique="true" clone-max="3" clone-node-max="3" target-role="Stopped" clone dlm_clone DLM \ meta clone-max="3" clone-node-max="1" target-role="Started" property $id="cib-bootstrap-options" \ dc-version="1.1.7-6.el6-148fccfd5985c5590cc601123c6c16e966b85d14" \ cluster-infrastructure="openais" \ expected-quorum-votes="3" \ stonith-enabled="false" \ last-lrm-refresh="1354024090" primitive mysql_drbd ocf:linbit:drbd params drbd_resource="mydata" op monitor role=Master interval=10 timeout=20 op monitor role=Slave interval=20 timeout=20 op start timeout=240 op stop timeout=100