• 018 Ceph的mon和osd的删除和添加


    一、OSD管理

    1.1 移出故障osd

    查看当前节点的osd的id

    [root@ceph2 ceph]# df -hT

    Filesystem     Type      Size  Used Avail Use% Mounted on
    /dev/vda1      xfs        40G  2.7G   38G   7% /
    devtmpfs       devtmpfs  1.9G     0  1.9G   0% /dev
    tmpfs          tmpfs     1.9G     0  1.9G   0% /dev/shm
    tmpfs          tmpfs     1.9G  201M  1.7G  11% /run
    tmpfs          tmpfs     1.9G     0  1.9G   0% /sys/fs/cgroup
    /dev/vdb1      xfs        15G  213M   15G   2% /var/lib/ceph/osd/ceph-0
    /dev/vdc1      xfs        15G  228M   15G   2% /var/lib/ceph/osd/ceph-3
    /dev/vdd1      xfs        15G  152M   15G   1% /var/lib/ceph/osd/ceph-6
    tmpfs          tmpfs     380M     0  380M   0% /run/user/0

    停止故障osd

    [root@ceph2 ceph]# systemctl stop ceph-osd@0

    产看状态

    [root@ceph2 ceph]# ceph -s

     cluster:
        id:     35a91e48-8244-4e96-a7ee-980ab989d20d
        health: HEALTH_WARN
                1 osds down
                Degraded data redundancy: 67/663 objects degraded (10.106%), 170 pgs unclean, 170 pgs degraded
      services:
        mon:        3 daemons, quorum ceph2,ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph3, ceph2
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        9 osds: 8 up, 9 in
        rbd-mirror: 1 daemon active
      data:
        pools:   13 pools, 504 pgs
        objects: 221 objects, 241 MB
        usage:   1770 MB used, 133 GB / 134 GB avail
        pgs:     67/663 objects degraded (10.106%)
                 334 active+clean
                 170 active+undersized+degraded

     [root@ceph2 ceph]# ceph osd tree

    ID  CLASS WEIGHT  TYPE NAME          STATUS REWEIGHT PRI-AFF 
     -9       0.13499 root dc1                                   
    -10       0.04500     rack rack1                             
     -3       0.04500         host ceph2                         
      0   hdd 0.01500             osd.0    down  1.00000 1.00000 
      3   hdd 0.01500             osd.3      up  1.00000 1.00000 
      6   hdd 0.01500             osd.6      up  1.00000 1.00000 
    -11       0.04500     rack rack2                             
     -7       0.04500         host ceph3                         
      2   hdd 0.01500             osd.2      up  1.00000 1.00000 
      5   hdd 0.01500             osd.5      up  1.00000 1.00000 
      8   hdd 0.01500             osd.8      up  1.00000 1.00000 
    -12       0.04500     rack rack3                             
     -5       0.04500         host ceph4                         
      1   hdd 0.01500             osd.1      up  1.00000 1.00000 
      4   hdd 0.01500             osd.4      up  1.00000 0.50000 
      7   hdd 0.01500             osd.7      up  1.00000 1.00000 
     -1       0.13499 root default                               
     -3       0.04500     host ceph2                             
      0   hdd 0.01500         osd.0        down  1.00000 1.00000 
      3   hdd 0.01500         osd.3          up  1.00000 1.00000 
      6   hdd 0.01500         osd.6          up  1.00000 1.00000 
     -7       0.04500     host ceph3                             
      2   hdd 0.01500         osd.2          up  1.00000 1.00000 
      5   hdd 0.01500         osd.5          up  1.00000 1.00000 
      8   hdd 0.01500         osd.8          up  1.00000 1.00000 
     -5       0.04500     host ceph4                             
      1   hdd 0.01500         osd.1          up  1.00000 1.00000 
      4   hdd 0.01500         osd.4          up  1.00000 0.50000 
      7   hdd 0.01500         osd.7          up  1.00000 1.00000 

     [root@ceph2 ceph]# ceph osd out osd.0

    marked out osd.0. 

     [root@ceph2 ceph]# ceph -s

     cluster:
        id:     35a91e48-8244-4e96-a7ee-980ab989d20d
        health: HEALTH_WARN
                Degraded data redundancy: 126/663 objects degraded (19.005%), 24 pgs unclean, 24 pgs degraded
      services:
        mon:        3 daemons, quorum ceph2,ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph3, ceph2
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        9 osds: 8 up, 8 in
        rbd-mirror: 1 daemon active
      data:
        pools:   13 pools, 504 pgs
        objects: 221 objects, 241 MB
        usage:   1587 MB used, 118 GB / 119 GB avail
        pgs:     126/663 objects degraded (19.005%)
                 480 active+clean
                 23  active+recovery_wait+degraded
                 1   active+recovering+degraded
      io:
        client:   4196 B/s rd, 0 B/s wr, 3 op/s rd, 0 op/s wr
        recovery: 2873 kB/s, 0 keys/s, 2 objects/s

     [root@ceph2 ceph]# ceph osd crush rm osd.0

    removed item id 0 name 'osd.0' from crush map

     [root@ceph2 ceph]# ceph auth list|grep osd.0

    installed auth entries:
    
    osd.0 

    [root@ceph2 ceph]# ceph auth rm osd.0

    updated

     [root@ceph2 ceph]# ceph -s

     cluster:
        id:     35a91e48-8244-4e96-a7ee-980ab989d20d
        health: HEALTH_OK
      services:
        mon:        3 daemons, quorum ceph2,ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph3, ceph2
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        9 osds: 8 up, 8 in
        rbd-mirror: 1 daemon active
      data:
        pools:   13 pools, 504 pgs
        objects: 221 objects, 241 MB
        usage:   1656 MB used, 118 GB / 119 GB avail
        pgs:     504 active+clean
      io:
        client:   0 B/s wr, 0 op/s rd, 0 op/s wr

     [root@ceph2 ceph]# ceph osd rm osd.0

    removed osd.0

     [root@ceph2 ceph]# ceph -s

     cluster:
        id:     35a91e48-8244-4e96-a7ee-980ab989d20d
        health: HEALTH_OK
     
      services:
        mon:        3 daemons, quorum ceph2,ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph3, ceph2
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        8 osds: 8 up, 8 in
        rbd-mirror: 1 daemon active
     
      data:
        pools:   13 pools, 504 pgs
        objects: 221 objects, 241 MB
        usage:   1656 MB used, 118 GB / 119 GB avail
        pgs:     504 active+clean
     
      io:
        client:   5321 B/s rd, 0 B/s wr, 5 op/s rd, 0 op/s wr 

    方法二

    ceph osd out osd.3
    systemctl stop ceph-osd@3
    ceph osd purge osd.3            #综合这一步,就可以完成操作
    删除配置文件中针对该osd的配置

    1.2添加回osd

    上述实验,是模拟osd坏掉,然后进行移除操作,但是osd这个硬盘的挂载,分区并没有清除,也就是说,在本次添加的时候,直接从创建osd秘钥开始,但是在生产环境中,更换了osd后,需要从创建盘格式化开始,过程请参照https://www.cnblogs.com/zyxnhr/p/10553717.html

    具体过程如下

    [root@ceph2 ceph-0]# ceph osd create
    0
    [root@ceph2 ceph-0]# ceph-authtool --create-keyring /etc/ceph/ceph.osd.0.keyring --gen-key -n osd.0 --cap mon 'allow profile osd' --cap mgr 'allow profile osd' --cap osd 'allow *'
    creating /etc/ceph/ceph.osd.0.keyring
    [root@ceph2 ceph-0]# ceph auth import -i /etc/ceph/ceph.osd.0.keyring 
    imported keyring
    [root@ceph2 ceph-0]#  ceph auth get-or-create osd.0 -o /var/lib/ceph/osd/ceph-0/keyring
    [root@ceph2 ceph-0]# ceph-osd -i 0 --mkfs  --cluster ceph
    2019-03-29 07:57:58.928076 7f564d51fd00 -1 created object store /var/lib/ceph/osd/ceph-0 for osd.0 fsid 35a91e48-8244-4e96-a7ee-980ab989d20d
    [root@ceph2 ceph-0]# cd /var/lib/ceph/osd/ceph-0
    [root@ceph2 ceph-0]# rm -f journal
    [root@ceph2 ceph-0]#  partuuid_0=`blkid /dev/vdb1|awk -F "[""]" '{print $8}'`
    [root@ceph2 ceph-0]#  echo $partuuid_0
    745dce53-1c63-4c50-b434-d441038dafe4
    [root@ceph2 ceph-0]# ln -s /dev/disk/by-partuuid/$partuuid_0 ./journal
    [root@ceph2 ceph-0]# ll
    total 64
    -rw-r--r--   1 root root   393 Mar 16 12:46 activate.monmap
    -rw-r--r--   1 ceph ceph     3 Mar 16 12:46 active
    -rw-r--r--   1 ceph ceph    37 Mar 16 12:46 ceph_fsid
    drwxr-xr-x 344 ceph ceph 12288 Mar 28 10:40 current
    -rw-r--r--   1 ceph ceph    37 Mar 16 12:46 fsid
    lrwxrwxrwx   1 root root    58 Mar 29 07:59 journal -> /dev/disk/by-partuuid/745dce53-1c63-4c50-b434-d441038dafe4
    -rw-r--r--   1 ceph ceph    37 Mar 16 12:46 journal_uuid
    -rw-------   1 ceph ceph    56 Mar 29 07:57 keyring
    -rw-r--r--   1 ceph ceph    21 Mar 16 12:46 magic
    -rw-r--r--   1 ceph ceph     6 Mar 16 12:46 ready
    -rw-r--r--   1 ceph ceph     4 Mar 16 12:46 store_version
    -rw-r--r--   1 ceph ceph    53 Mar 16 12:46 superblock
    -rw-r--r--   1 ceph ceph     0 Mar 16 12:47 systemd
    -rw-r--r--   1 ceph ceph    10 Mar 16 12:46 type
    -rw-r--r--   1 ceph ceph     2 Mar 16 12:46 whoami
    [root@ceph2 ceph-0]# chown ceph.ceph -R /var/lib/ceph
    [root@ceph2 ceph-0]# ceph-osd --mkjournal -i 0 --cluster ceph
    2019-03-29 08:00:02.007442 7f416ec90d00 -1 journal read_header error decoding journal header
    2019-03-29 08:00:02.018206 7f416ec90d00 -1 created new journal /var/lib/ceph/osd/ceph-0/journal for object store /var/lib/ceph/osd/ceph-0
    [root@ceph2 ceph-0]# chown ceph.ceph /dev/disk/by-partuuid/$partuuid_0
    [root@ceph2 ceph-0]# ceph osd crush add-bucket ceph2 host --cluster ceph
    bucket 'ceph2' already exists                          #不用创建bucket,在移除的时候,并没有移除主机的bucket
    [root@ceph2 ceph-0]# ceph osd crush move ceph2 root=default --cluster ceph       #也不需要把ceph2添加到default的这个crushrule中
    no need to move item id -3 name 'ceph2' to location {root=default} in crush map
    [root@ceph2 ceph-0]# ceph osd crush add osd.0 0.01500 root=default host=ceph2
    add item id 0 name 'osd.0' weight 0.015 at location {host=ceph2,root=default} to crush map
    [root@ceph2 ceph-0]# systemctl start ceph-osd@0
    [root@ceph2 ceph-0]# systemctl enable ceph-osd@0
    [root@ceph2 ceph-0]# ps -ef|grep osd
    ceph     1147069       1  0 Mar28 ?        00:02:51 /usr/bin/ceph-osd -f --cluster ceph --id 6 --setuser ceph --setgroup ceph
    ceph     1147169       1  0 Mar28 ?        00:03:19 /usr/bin/ceph-osd -f --cluster ceph --id 3 --setuser ceph --setgroup ceph
    ceph     1220601       1  6 08:04 ?        00:00:01 /usr/bin/ceph-osd -f --cluster ceph --id 0 --setuser ceph --setgroup ceph
    root     1220713 1156971  0 08:04 pts/0    00:00:00 grep --color=auto osd
    [root@ceph2 ceph-0]# ceph -s
      cluster:
        id:     35a91e48-8244-4e96-a7ee-980ab989d20d
        health: HEALTH_WARN
                1/2481 objects misplaced (0.040%)
                Degraded data redundancy: 423/2481 objects degraded (17.050%), 8 pgs unclean, 16 pgs degraded    #有数据正在重平衡
      services:
        mon:        3 daemons, quorum ceph2,ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph3, ceph2
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        9 osds: 9 up, 9 in; 1 remapped pgs
        rbd-mirror: 1 daemon active
      data:
        pools:   13 pools, 504 pgs
        objects: 827 objects, 2206 MB
        usage:   6747 MB used, 128 GB / 134 GB avail
        pgs:     423/2481 objects degraded (17.050%)
                 1/2481 objects misplaced (0.040%)
                 487 active+clean
                 14  active+recovery_wait+degraded
                 2   active+recovering+degraded
                 1   active+remapped+backfill_wait
      io:
        client:   4093 B/s rd, 0 B/s wr, 4 op/s rd, 0 op/s wr
        recovery: 20080 kB/s, 0 keys/s, 6 objects/s
    [root@ceph2 ceph-0]# ceph -s
      cluster:
        id:     35a91e48-8244-4e96-a7ee-980ab989d20d
        health: HEALTH_OK
     
      services:
        mon:        3 daemons, quorum ceph2,ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph3, ceph2
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        9 osds: 9 up, 9 in        #恢复正常,有9个OSD,切集群处于健康状态
        rbd-mirror: 1 daemon active
      data:
        pools:   13 pools, 504 pgs
        objects: 827 objects, 2206 MB
        usage:   6614 MB used, 128 GB / 134 GB avail
        pgs:     504 active+clean
      io:
        client:   4093 B/s rd, 0 B/s wr, 4 op/s rd, 0 op/s wr

    1.2 移除故障节点

    方法一:

    1. 先移除节点上所有osd

    2. ceph osd crush remove serverc

    方法二:

    1. 先迁移节点上所有osd

    2. 修改crushmap,删除所有与该节点相关的配置

    1.3 恢复和回填OSD

            在OSD添加或移除时,Ceph会重平衡PG。数据回填和恢复操作可能会产生大量的后端流量,影响集群性能。

            为避免性能降低,可对回填/恢复操作进行配置:

            osd_recovery_op_priority # 值为1-63,默认为10,相对于客户端操作,恢复操作的优先级,默认客户端操作的优先级为63,参数为osd_client_op_priority

            osd_recovery_max_active # 每个osd一次处理的活跃恢复请求数量,默认为15,增大此值可加速恢复,但会增加集群负载

            osd_recovery_threads # 用于数据恢复时的线程数,默认为1

            osd_max_backfills # 单个osd的最大回填操作数,默认为10

            osd_backfill_scan_min # 回填操作时最小扫描对象数量,默认为64

            osd_backfill_scan_max # 回填操作的最大扫描对象数量,默认为512

            osd_backfill_full_ratio # osd的占满率达到多少时,拒绝接受回填请求,默认为0.85

            osd_backfill_retry_interval # 回填重试的时间间隔

    二、 monitor管理 

    2.1 摘除monitor

    停止monitor进程

    [root@ceph2 ceph]# systemctl stop ceph-mon@ceph2

    删除monitor

    [root@ceph2 ceph]# ceph mon remove ceph2

    removing mon.ceph2 at 172.25.250.11:6789/0, there will be 2 monitors

    [root@ceph2 ceph]# ceph -s

      services:
        mon:        2 daemons, quorum ceph3,ceph4
        mgr:        ceph4(active), standbys: ceph2, ceph3
        mds:        cephfs-1/1/1 up  {0=ceph2=up:active}, 1 up:standby
        osd:        9 osds: 9 up, 9 in
        rbd-mirror: 1 daemon active

     删除monitor文件

    [root@ceph2 ceph]# cd /var/lib/ceph/mon/
    [root@ceph2 mon]# ls
    ceph-ceph2
    [root@ceph2 mon]# rm -rf ceph-ceph2/

    2.2 添加monitor节点

    [root@ceph2 mon]# cd

    创建文件

    [root@ceph2 ~]# mkdir /var/lib/ceph/mon/ceph-ceph2

    [root@ceph2 ~]# chown ceph.ceph -R !$

    chown ceph.ceph -R /var/lib/ceph/mon/ceph-ceph2

    [root@ceph2 ~]# ceph auth get mon.

    exported keyring for mon.
    [mon.]
    key = AQDqfYxcAAAAABAAIc47ZLcYh013gzu3WWruew==
    caps mon = "allow *"

    [root@ceph2 ~]# ceph auth get mon. -o /tmp/ceph.mon.keyring

    exported keyring for mon.

    [root@ceph2 ~]# cat /tmp/ceph.mon.keyring

    [mon.]
    key = AQDqfYxcAAAAABAAIc47ZLcYh013gzu3WWruew==
    caps mon = "allow *"

    [root@ceph2 ~]# ceph mon getmap -o /tmp/monmap.bin

    got monmap epoch 2

    [root@ceph2 ~]# monmaptool --print /tmp/monmap.bin

    monmaptool: monmap file /tmp/monmap.bin
    epoch 2
    fsid 35a91e48-8244-4e96-a7ee-980ab989d20d
    last_changed 2019-03-28 08:57:25.819243
    created 2019-03-16 12:39:14.839999
    0: 172.25.250.12:6789/0 mon.ceph3
    1: 172.25.250.13:6789/0 mon.ceph4

    [root@ceph2 ~]# sudo -u ceph ceph-mon -i ceph2 --mkfs --monmap /tmp/monmap.bin --keyring /tmp/ceph.mon.keyring

    [root@ceph2 ~]# ll /var/lib/ceph/mon/ceph-ceph2/

    total 8
    -rw------- 1 ceph ceph 77 Mar 28 09:03 keyring
    -rw-r--r-- 1 ceph ceph 8 Mar 28 09:03 kv_backend
    drwxr-xr-x 2 ceph ceph 112 Mar 28 09:03 store.db

    [root@ceph2 ~]# ps -ef |grep ceph-mon

    root 1135665 1088603 0 09:12 pts/0 00:00:00 grep --color=auto ceph-mon

    [root@ceph2 ~]# sudo -u ceph ceph-mon -i ceph2 --public-addr 172.25.250.11:6789

    [root@ceph2 ~]# !ps

    ps -ef |grep ceph-mon
    ceph 1135726 1 1 09:13 ? 00:00:00 ceph-mon -i ceph2 --public-addr 172.25.250.11:6789
    root 1135771 1088603 0 09:13 pts/0 00:00:00 grep --color=auto ceph-mon

    [root@ceph2 ~]# ceph -s

    services:
    mon: 3 daemons, quorum ceph2,ceph3,ceph4
    mgr: ceph4(active), standbys: ceph2, ceph3
    mds: cephfs-1/1/1 up {0=ceph2=up:active}, 1 up:standby
    osd: 9 osds: 9 up, 9 in
    rbd-mirror: 1 daemon active

    2.3 monitor故障排查

    [root@ceph2 ~]# ceph daemon mon.ceph2  quorum_status

    {       #查看monipot票数
        "election_epoch": 128,
        "quorum": [
            0,
            1,
            2
        ],
        "quorum_names": [
            "ceph2",
            "ceph3",
            "ceph4"
        ],
        "quorum_leader_name": "ceph2",
        "monmap": {
            "epoch": 3,
            "fsid": "35a91e48-8244-4e96-a7ee-980ab989d20d",
            "modified": "2019-03-28 09:13:19.932456",
            "created": "2019-03-16 12:39:14.839999",
            "features": {
                "persistent": [
                    "kraken",
                    "luminous"
                ],
                "optional": []
            },
            "mons": [
                {
                    "rank": 0,
                    "name": "ceph2",
                    "addr": "172.25.250.11:6789/0",
                    "public_addr": "172.25.250.11:6789/0"
                },
                {
                    "rank": 1,
                    "name": "ceph3",
                    "addr": "172.25.250.12:6789/0",
                    "public_addr": "172.25.250.12:6789/0"
                },
                {
                    "rank": 2,
                    "name": "ceph4",
                    "addr": "172.25.250.13:6789/0",
                    "public_addr": "172.25.250.13:6789/0"
                }
            ]
        }
    }

    2.4 利用admin sockets管理守护进程

    通过admin sockets,管理员可以直接与守护进程交互。如查看和修改守护进程的配置参数。

    守护进程的socket文件一般是/var/run/ceph/$cluster-$type.$id.asok

    基于admin sockets的操作:

    ceph daemon $type.$id command

    或者ceph --admin-daemon /var/run/ceph/$cluster-$type.$id.asok command

    常用command如下:

    help

    config get parameter

    config set parameter

    config show

    perf dump

    查看 

    [root@ceph2 ceph]# ceph daemon osd.6 config show|grep osd_default

     "osd_default_data_pool_replay_window": "45",
        "osd_default_notify_timeout": "30",

    [root@ceph2 ceph]# ceph daemon osd.6 config get xio_mp_max_64

    {
        "xio_mp_max_64": "65536"
    }

    修改 

    [root@ceph2 ceph]# ceph tell osd.* injectargs --xio_mp_max_64 65536

    osd.1: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.2: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.3: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.4: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.5: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.6: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.7: xio_mp_max_64 = '65536' (not observed, change may require restart) 
    osd.8: xio_mp_max_64 = '65536' (not observed, change may require restart) 

    博主声明:本文的内容来源主要来自誉天教育晏威老师,由本人实验完成操作验证,需要的博友请联系誉天教育(http://www.yutianedu.com/),获得官方同意或者晏老师(https://www.cnblogs.com/breezey/)本人同意即可转载,谢谢!

  • 相关阅读:
    读懂diff
    Sqlite数据库的加密
    SQLite 数据类型
    SQLite 混合模式程序集是针对“v2.0.50727”版的运行时生成的,在没有配置其他信息的情况下,无法在 4.0 运行时中加载该程序集。
    datatable写入sqlite
    使用NSSM将exe封装为服务
    Java OPC client开发踩坑记
    最终解决:win10小娜无法使用(win10 win+q 无法搜索应用程序)
    OPC DA 客户端实例[.net]
    KepServer作为OPC UA服务器以及建立OPC UA客户端
  • 原文地址:https://www.cnblogs.com/zyxnhr/p/10617542.html
Copyright © 2020-2023  润新知