• 基于DNS(Consul)高可用


    DNS

    推荐从Bind-DLZ入手,资料多
    可控制度更好(查询DNS记录SQL可定制)
    据说性能差
    Bind-DLZ
    https://www.cnblogs.com/saneri/p/8178065.html
      
    PowerDNS
    SQL schema设置规范
    性能比Bind-DLZ好
     
    coredns 和k8s结合比较多

    nacos 阿里开源,含DNS和服务发现
     

    监控程序:
    主从结构,支持GTID
     
    监控逻辑:
    按分组取出来机器节点
    master:
    尝试连接成功 ok 保持
    失败  进行从库选举
    slave:
    检查是不是在线online
    在线的:
    连接成功,复制是不是正常,不正常下线,检验延迟
    下线更新cmdb,dns records
    下线的:
    连接成功,复制正常,不延迟 上线
    上线更新cmdb,dns records
    从库选举:
    获取从库列表
     
    故障切换:
    确认所有节点都复制中断,判断复制完成
    对比,所有节点是不是复制到一个位置
    通过获取的GTID对比,是不是所有节点同步到一个位置,如果不是,选举出来最靠前的做master
    如果同步位置都一样,根据cmdb中定义的level选择,最大的那个
    新的主节点选举成功后,其他节点change过来
    更新cmdb中的角色,oldmaster->slave,选举出来的master,更改新主节点的read_only
    更新dns_records
     
    在线切换:
    oldmaster上执行super_read_only & read_only干掉业务连接
    获取oldmaster中的show master status信息
    获取从节点中的show slave status对比,确认都同步完成
    按cmdb中的level或是指定的节点为新master
    更新重做master/slave架构
    更新cmdb
    更新dns
    记录log
     
    一定要提高英文阅读能力

    [root@mydb1 ~]# wget https://releases.hashicorp.com/consul/1.4.0/consul_1.4.0_linux_amd64.zip
    [root@mydb1 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
    [root@mydb2 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
    [root@mydb3 ~]# mkdir -p /opt/consul /opt/consul/conf /data/consul /data/consul/shell/
    [root@mydb1 ~]# unzip consul_1.4.0_linux_amd64.zip
    将consul拷贝至/opt/consul目录
    [root@mydb1 ~]# cat /opt/consul/conf/server.json
    {
      "data_dir": "/data/consul",
      "enable_script_checks": true,
      "datacenter": "dc1",
      "log_level": "INFO",          
      "server": true,              
      "bootstrap_expect": 3,          
      "ui":true
    }
    [root@mydb1 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
    [root@mydb2 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
    [root@mydb3 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
    [root@mydb2 consul]# ./consul join 192.168.1.101
    [root@mydb3 consul]# ./consul join 192.168.1.101
    [root@mydb1 consul]# ./consul members
    Node   Address             Status  Type    Build  Protocol  DC   Segment
    mydb1  192.168.1.101:8301  alive   server  1.4.0  2         dc1  <all>
    mydb2  192.168.1.102:8301  alive   server  1.4.0  2         dc1  <all>
    mydb3  192.168.1.103:8301  alive   server  1.4.0  2         dc1  <all>
    [root@mydb1 consul]# ./consul catalog nodes
    Node   ID        Address        DC
    mydb1  52514e74  192.168.1.101  dc1
    mydb2  aebbf0b2  192.168.1.102  dc1
    mydb3  0e179069  192.168.1.103  dc1

    # dig @127.0.0.1 -p 8600 mydb1.node.consul
    # dig @127.0.0.1 -p 8600 mydb2.node.consul
    # dig @127.0.0.1 -p 8600 mydb3.node.consul

    [root@mydb1 consul]# ./consul operator raft list-peers
    Node   ID                                    Address             State     Voter  RaftProtocol
    mydb1  52514e74-d063-cfe3-1d58-55fda9fc2451  192.168.1.101:8300  leader    true   3
    mydb2  aebbf0b2-09ad-f396-4c21-3f9ee40a16da  192.168.1.102:8300  follower  true   3
    mydb3  0e179069-7360-3866-d9a6-7ea60c540c04  192.168.1.103:8300  follower  true   3

    [root@mydb1 consul]# ./consul kv put id 11
    Success! Data written to: id
    [root@mydb1 consul]# ./consul kv get id
    11
    [root@mydb2 consul]# ./consul kv get id
    11
    [root@mydb3 consul]# ./consul kv get id
    11

    consul是用Raft来实现分布式一致性的
     
     
    [root@mydb1 ~]# cat /opt/consul/conf/r-test-mgr-ser.json
    {
      "service": {
        "name": "r-test-3306-mydb-ser",
        "tags": ["测试-3306"],
        "address": "192.168.1.101",
        "meta": {
          "meta": "for my service"
        },
        "port": 3306,
        "enable_tag_override": false,
        "checks": [
          {
            "args": ["/data/consul/shell/check_mysql_mgr_slave.sh"],
            "interval": "1s"
          }
        ]
      }
    }
    [root@mydb1 ~]# cat /opt/consul/conf/w-test-mgr-ser.json
    {
      "service": {
        "name": "w-test-3306-mydb-ser",
        "tags": ["测试-3306"],
        "address": "192.168.1.101",
        "meta": {
          "meta": "for my service"
        },
        "port": 3306,
        "enable_tag_override": false,
        "checks": [
          {
            "args": ["/data/consul/shell/check_mysql_mgr_master.sh"],
            "interval": "10s"
          }
        ]
      }
    }
    注意在mydb2,mydb3上调整ip
    检测脚本如下
    [root@mydb1 ~]# cat /data/consul/shell/check_mysql_mgr_master.sh
    #!/bin/bash
    host="192.168.1.101"
    port=3306
    user="dba_user"
    passwod="msds007"
    comm="/usr/local/mysql/bin/mysql -u$user -h$host -P $port -p$passwod"
    value=`$comm -Nse "select 1"`
    primary_member=`$comm -Nse "select variable_value from performance_schema.global_status WHERE VARIABLE_NAME= 'group_replication_primary_member'"`
    server_uuid=`$comm -Nse "select variable_value from performance_schema.global_variables where VARIABLE_NAME='server_uuid';"`
    # 判断MySQL是否存活
    if [ -z $value ]
    then
       echo "mysql $port is down....."
       exit 2
    fi
    # 判断节点状态,是否存活
    node_state=`$comm -Nse "select MEMBER_STATE from performance_schema.replication_group_members where MEMBER_ID='$server_uuid'"`
    if [ $node_state != "ONLINE" ]
    then
       echo "MySQL $port state is not online...."
       exit 2
    fi
    # 判断是不是主节点
    if [[ $server_uuid == $primary_member ]]
    then
       echo "MySQL $port Instance is master ........"
       exit 0
    else
       echo "MySQL $port Instance is slave ........"
       exit 2
    fi
    [root@mydb1 ~]# cat /data/consul/shell/check_mysql_mgr_slave.sh
    #!/bin/bash
    host="192.168.1.101"
    port=3306
    user="dba_user"
    passwod="msds007"
    comm="/usr/local/mysql/bin/mysql -u$user -h$host -P $port -p$passwod"
    value=`$comm -Nse "select 1"`
    primary_member=`$comm -Nse "select variable_value from performance_schema.global_status WHERE VARIABLE_NAME= 'group_replication_primary_member'"`
    server_uuid=`$comm -Nse "select variable_value from performance_schema.global_variables where VARIABLE_NAME='server_uuid';"`
    # 判断mysql是否存活
    if [ -z $value ]
    then
       echo "mysql $port is down....."
       exit 2
    fi
    # 判断节点状态
    node_state=`$comm -Nse "select MEMBER_STATE from performance_schema.replication_group_members where MEMBER_ID='$server_uuid'"`
    if [ $node_state != "ONLINE" ]
    then
       echo "MySQL $port state is not online...."
       exit 2
    fi
    # 判断是不是主节点
    if [[ $server_uuid != $primary_member ]]
    then
       echo "MySQL $port Instance is slave ........"
       exit 0
    else
       node_num=`$comm -Nse "select count(*) from performance_schema.replication_group_members"`
    # 判断如果没有任何从节点,主节点也注册从角色服务。
       if [ $node_num -eq 1 ]
       then
           echo "MySQL $port Instance is slave ........"
           exit 0
       else
           echo "MySQL $port Instance is master ........"
           exit 2
       fi
    fi
    注意在mydb2,mydb3上调整ip
     

    [root@mydb1 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
    [root@mydb2 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
    [root@mydb3 consul]# ./consul agent -config-dir=/opt/consul/conf > /data/consul/consul.log &
    [root@mydb2 consul]# ./consul join 192.168.1.101
    [root@mydb3 consul]# ./consul join 192.168.1.101
    [root@mydb1 consul]# ./consul members
     
    # dig @127.0.0.1 -p 8600 w-test-3306-mydb-ser.service.consul
    # dig @127.0.0.1 -p 8600 r-test-3306-mydb-ser.service.consul
     
    Consul使用手册
    http://www.liangxiansen.cn/2017/04/06/consul/
     
     
     
  • 相关阅读:
    reids 持久化
    center os 下redis安装以及基本使用
    MongoDB安装(Window)
    mysql中文乱码解决办法
    github托管代码
    MySQL表损坏修复【Incorrect key file for table】
    运维杂记-02
    配置ssh秘钥登陆
    nginx解决跨域问题
    运维杂记-01
  • 原文地址:https://www.cnblogs.com/allenhu320/p/11362937.html
Copyright © 2020-2023  润新知