NFS高可用(NFS+keepalive+Sersync)
NFS高可用(NFS+keepalive+Sersync)
作者: 张首富
背景
NFS这样古老的共享存储的技术,被众多小公司和没钱的公司采用,现在就我司就需要出一套客户的离线版本方案,客户们想数据安全却又不想花钱,所以我就采用了NFS做后端数据存储,
NFS目前数据同步的方式主要两种:
- 使用Sersync来实现主从同步
- 第二种借助
DRBD
实现主从同步
但是这两种方案都只是实现了数据的主从同步,对NFS服务的高可用没有任何实现,网上大部分是采用heartbeat来实现,我这边想采用不一样的keepalive来实现这个
网络拓扑
安装前准备
服务器信息:
IP |
角色/HOSTNAME |
---|---|
192.168.1.110 |
×××(keepalive的虚拟IP) |
192.168.1.112 |
NFS-Master |
192.168.1.111 |
NFS-Slave |
192.168.1.120 |
NFS-Client |
服务器信息:
# cat /etc/redhat-release
CentOS Linux release 7.5.1804 (Core)
# uname -r
3.10.0-862.el7.x86_64
共享的目录:
nfs master 和slave 都创建一个/test_nfs 目录来当做共享目录
初始化环境安装
在三台机器上同时执行 服务器基本优化:
#Yum源更换为国内阿里源
yum install wget telnet -y
mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
#添加阿里的epel源
#add the epel
wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
# rpm -ivh http://dl.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-8.noarch.rpm
#yum重新建立缓存
yum clean all
yum makecache
#同步时间
yum -y install ntp
/usr/sbin/ntpdate cn.pool.ntp.org
echo "* 4 * * * /usr/sbin/ntpdate cn.pool.ntp.org > /dev/null 2>&1" >> /var/spool/cron/root
systemctl restart crond.service
#安装vim
yum -y install vim
#设置最大打开文件描述符数
echo "ulimit -SHn 102400" >> /etc/rc.local
cat >> /etc/security/limits.conf << EOF
* soft nofile 655350
* hard nofile 655350
EOF
#禁用selinux
sed -i 's/SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config
setenforce 0
#关闭防火墙
systemctl disable firewalld.service
systemctl stop firewalld.service
#set ssh
sed -i 's/^GSSAPIAuthentication yes$/GSSAPIAuthentication no/' /etc/ssh/sshd_config
sed -i 's/#UseDNS yes/UseDNS no/' /etc/ssh/sshd_config
systemctl restart sshd.service
#内核参数优化
cat >> /etc/sysctl.conf << EOF
vm.overcommit_memory = 1
net.ipv4.ip_local_port_range = 1024 65536
net.ipv4.tcp_fin_timeout = 1
net.ipv4.tcp_keepalive_time = 1200
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_synack_retries = 1
net.ipv4.tcp_syn_retries = 1
net.ipv4.tcp_abort_on_overflow = 0
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.core.netdev_max_backlog = 262144
net.core.somaxconn = 262144
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.tcp_max_syn_backlog = 262144
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.ipv4.netfilter.ip_conntrack_max = 2097152
net.nf_conntrack_max = 655360
net.netfilter.nf_conntrack_tcp_timeout_established = 1200
EOF
/sbin/sysctl -p
安装nfs
yum -y install nfs-utils rpcbind
配置nfs共享目录
NFS-Master:
[root@NFS-Master ~]# echo '/test_nfs 192.168.1.0/24(rw,sync,all_squash)' >> /etc/exports
[root@NFS-Master ~]# systemctl start rpcbind && systemctl start nfs
[root@NFS-Master ~]# systemctl enable rpcbind && systemctl enable nfs
Created symlink from /etc/systemd/system/multi-user.target.wants/nfs-server.service to /usr/lib/systemd/system/nfs-server.service.
NFS-Client:
[root@NFS-Slave ~]# echo '/test_nfs 192.168.1.0/24(rw,sync,all_squash)' >> /etc/exports
[root@NFS-Slave ~]# systemctl start rpcbind && systemctl start nfs
[root@NFS-Slave ~]# systemctl enable rpcbind && systemctl enable nfs
Created symlink from /etc/systemd/system/multi-user.target.wants/nfs-server.service to /usr/lib/systemd/system/nfs-server.service.
测试挂载是否成功:
#测试NFS-Master端
[root@NFS-Client /]# mount -t nfs 192.168.1.112:/test_nfs /mnt
[root@NFS-Client /]# df -Th
文件系统 类型 容量 已用 可用 已用% 挂载点
192.168.1.112:/test_nfs nfs4 921G 69G 852G 8% /mnt
[root@NFS-Client /]# umount /mnt
#测试NFS-Slave端
[root@NFS-Client /]# mount -t nfs 192.168.1.111:/test_nfs /mnt
[root@NFS-Client /]# df -Th
文件系统 类型 容量 已用 可用 已用% 挂载点
192.168.1.111:/test_nfs nfs4 931G 53G 878G 6% /mnt
[root@NFS-Client /]# umount /mnt
安装配置rsync + Sersync
在NFS-Slave端安装rsync,因为我们在NFS-Master上写入数据要备份到NFS-Slave服务器上,所以我们在NFS-Slave上启动rsync,如果不清楚可以看rsync服务介绍这里不多讲
[root@NFS-Slave ~]# yum -y install rsync.x86_64
[root@NFS-Slave ~]# cat /etc/rsyncd.conf
uid = nfsnobody
gid = nfsnobody
port = 873
pid file = /var/rsyncd.pid
log file = /var/log/rsyncd.log
use chroot = no
max connections = 200
read only = false
list = false
fake super = yes
ignore errors
[test_nfs]
path = /test_nfs
auth users = test_nfs
secrets file = /etc/rsync.pass
hosts allow = 192.168.1.0/24
[root@NFS-Slave ~]# systemctl start rsyncd && systemctl enable rsyncd
[root@NFS-Slave ~]# echo 'test_nfs:zsf123' > /etc/rsync.pass
[root@NFS-Slave ~]# chmod 600 /etc/rsync.pass
[root@NFS-Slave ~]# chown nfsnobody:nfsnobody /test_nfs/
NFS-Master测试
[root@NFS-Master ~]# yum -y install rsync.x86_64
[root@NFS-Master ~]# chown nfsnobody:nfsnobody /test_nfs/
[root@NFS-Master ~]# echo "zsf123" > /etc/rsync.pass
[root@NFS-Master ~]# chmod 600 /etc/rsync.pass
#创建测试文件,测试推送
[root@NFS-Master ~]# cd /test_nfs/
[root@NFS-Master test_nfs]# echo "This is test file" > file.txt
[root@NFS-Master test_nfs]# rsync -arv /test_nfs/ test_nfs@192.168.1.111::test_nfs --password-file=/etc/rsync.pass
sending incremental file list
./
file.txt
sent 155 bytes received 38 bytes 386.00 bytes/sec
total size is 18 speedup is 0.09
#到NFS-Slave上查看文件
[root@NFS-Slave ~]# ls /test_nfs/
file.txt
[root@NFS-Slave ~]# cat /test_nfs/file.txt
This is test file
NFS-Master安装Sersync 因为Sersync只有安装在NFS-Master上的时候才能检测到/test_nfs
目录是否有文件写入,才能触发推送
[root@NFS-Master test_nfs]# cd /usr/local/
[root@NFS-Master local]# yum -y install wget.x86_64
#下载Sersync的安装包
[root@NFS-Master local]# wget https://raw.githubusercontent.com/wsgzao/sersync/master/sersync2.5.4_64bit_binary_stable_final.tar.gz
[root@NFS-Master local]# tar xvf sersync2.5.4_64bit_binary_stable_final.tar.gz
GNU-Linux-x86/
GNU-Linux-x86/sersync2
GNU-Linux-x86/confxml.xml
[root@NFS-Master local]# mv GNU-Linux-x86/ sersync
[root@NFS-Master local]# cd sersync/
更改sersync的配置文件
[root@NFS-Master local]# sed -ri 's#<delete start="true"/>#<delete start="false"/>#g'
[root@NFS-Master local]# sed -ri '24s#<localpath watch="/opt/tongbu">#<localpath watch="/test_nfs">#g' confxml.xml
[root@NFS-Master local]# sed -ri '25s#<remote ip="127.0.0.1" name="tongbu1"/>#<remote ip="192.168.1.111" name="test_nfs"/>#g' confxml.xml
[root@NFS-Master local]# sed -ri '30s#<commonParams params="-artuz"/>#<commonParams params="-az"/>#g' confxml.xml
[root@NFS-Master local]# sed -ri '31s#<auth start="false" users="root" passwordfile="/etc/rsync.pas"/>#<auth start="true" users="test_nfs" passwordfile="/etc/rsync.pass"/>#g' confxml.xml
[root@NFS-Master local]# sed -ri '33s#<timeout start="false" time="100"/><!-- timeout=100 -->#<timeout start="true" time="100"/><!-- timeout=100 -->#g' confxml.xml
#启动Sersync
[root@NFS-Master sersync]# /usr/local/sersync/sersync2 -dro /usr/local/sersync/confxml.xml
测试:
[root@NFS-Master test_nfs]# echo "This is two test file" > two.file.txt
[root@NFS-Slave test_nfs]# ls
file.txt two.file.txt
[root@NFS-Slave test_nfs]# cat two.file.txt
This is two test file
看到上面结果说明Sersync实时同步我们已经完成了,
安装配置keepalive
NFS-Master
[root@NFS-Master test_nfs]# yum -y install keepalived.x86_64
[root@NFS-Master test_nfs]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id NFS-Master
}
vrrp_instance VI_1 {
state MASTER
interface enp0s31f6
virtual_router_id 51
priority 150
advert_int 1
authentication {
auth_type PASS
auth_pass zhangshoufu
}
virtual_ipaddress {
192.168.1.101
}
}
[root@NFS-Master test_nfs]# systemctl start keepalived.service && systemctl enable keepalived.service
NFS-Slave
[root@NFS-Slave test_nfs]# yum -y install keepalived.x86_64
[root@NFS-Slave test_nfs]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id NFS-Slave
}
vrrp_instance VI_1 {
state MASTER
interface enp0s31f6
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass zhangshoufu
}
virtual_ipaddress {
192.168.1.101
}
}
[root@NFS-Slave test_nfs]# systemctl start keepalived.service && systemctl enable keepalived.service
查看虚拟IP是否存在
[root@NFS-Master test_nfs]# ip a | grep 192.168.1.101
inet 192.168.1.101/32 scope global enp0s31f6
看到上图说明keepalive启动成功, 配置文件解读
//全局定义模块
global_defs {
router_id NFS-Master
//运行keepalive机器的标识
}
vrrp_instance VI_1 {
//靠priority 来区别主从,这个不生效
state MASTER
//实例绑定的网卡
interface enp0s31f6
//组ID,值一样为一个组
virtual_router_id 51
//优先级,优先级高的为master
priority 150
//检查时间间隔
advert_int 1
//认证模式为密码认证
authentication {
auth_type PASS
auth_pass zhangshoufu
}
//设置的VIP
virtual_ipaddress {
192.168.1.101
}
}
NFS-Client通过VIP挂载测试
[root@NFS-Client ~]# mount -t nfs 192.168.1.101:/test_nfs /mnt
[root@NFS-Client ~]# ls /mnt/
file.txt two.file.txt
[root@NFS-Client ~]# umount /mnt/
模拟机器Down机,测试虚拟IP地址是否会漂移
[root@NFS-Master scripts]# ip a | grep 101
inet 192.168.1.101/32 scope global enp0s31f6
[root@NFS-Slave ~]# ip a | grep 101
[root@NFS-Slave ~]#
//关闭NFS-Master上的keepalive服务
[root@NFS-Master scripts]# systemctl stop keepalived.service
[root@NFS-Master scripts]# ip a | grep 101
[root@NFS-Master scripts]#
[root@NFS-Slave ~]# ip a | grep 101
inet 192.168.1.101/32 scope global enp0s31f6
成功漂移 keepalive脚本 因为keepalive的漂移机制是根据keepalive这个服务是否存活来判断IP地址是否漂移的,如果我们是机器down机,此方法可以直接使用,但是如果是网络出现问题导致了服务不可用的,
[root@NFS-Master scripts]# pwd
/usr/local/scripts
[root@NFS-Master scripts]# cat check_keepalive.sh
#!/bin/bash
export PATH=$PATH
sum=0
for i in `seq 29`;do
if ping -c 1 192.168.1.1 &> /dev/null || ;then
continue
else
sum=`awk '{sum++;print sum}'`
if [ $sum -eq 3 ];then
systemctl stop keepalived.service
fi
fi
sleep 2
done
加到定时任务里:
[root@NFS-Master ~]# chmod 777 /usr/local/scripts/check_keepalive.sh
[root@NFS-Master ~]# crontab -e
* * * * * /usr/local/scripts/check_keepalive.sh &> /dev/null
客户端检测脚本
#!/bin/bash
export PATH=$PATH
for i in `seq 29`;do
df -Th &> /dev/null
if [ `echo $?` -ne 0 ];then
umount -lf /mnt && mount -t nfs 192.168.1.101:/test_nfs /mnt
fi
sleep 2
done
不足: 这个里面会存在几秒数据的丢失,如果真想保持数据强一致,还是不要省钱采用分布式存储把