查看OSD状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# deploy节点执行
[root@yz-node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 254.60396 root default
-3 87.29279 host yz-node1
0 hdd 7.27440 osd.0 up 1.00000 1.00000
1 hdd 7.27440 osd.1 up 1.00000 1.00000
...
-5 87.29279 host yz-node2
10 hdd 7.27440 osd.10 up 1.00000 1.00000
11 hdd 7.27440 osd.11 up 1.00000 1.00000
...
-7 80.01839 host yz-node3
22 hdd 7.27440 osd.22 down 0 1.00000
23 hdd 7.27440 osd.23 down 0 1.00000
24 hdd 7.27440 osd.24 down 0 1.00000
25 hdd 7.27440 osd.25 down 0 1.00000
26 hdd 7.27440 osd.26 down 0 1.00000
27 hdd 7.27440 osd.27 down 0 1.00000
28 hdd 7.27440 osd.28 down 1.00000 1.00000
29 hdd 7.27440 osd.29 down 0 1.00000
30 hdd 7.27440 osd.30 down 0 1.00000
31 hdd 7.27440 osd.31 down 0 1.00000
32 hdd 7.27440 osd.32 down 1.00000 1.00000
33 hdd 7.27440 osd.33 down 0 1.00000

写脚本来删除osd

1
2
3
4
5
6
7
8
9
10
11
12
13
# deploy节点执行
[root@yz-node1 ~]# vi del.sh
#!/bin/sh
for k in $( seq 22 33 )
do
ceph osd out osd.$k
ceph osd crush rm osd.$k
ceph auth del osd.$k
ceph osd down osd.$k
ceph osd rm osd.$k
umount /var/lib/ceph/osd/ceph-$k
done
[root@yz-node1 ~]# sh del.sh

查看OSD状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# deploy节点执行
[root@yz-node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 254.60396 root default
-3 87.29279 host yz-node1
0 hdd 7.27440 osd.0 up 1.00000 1.00000
1 hdd 7.27440 osd.1 up 1.00000 1.00000
...
-5 87.29279 host yz-node2
10 hdd 7.27440 osd.10 up 1.00000 1.00000
11 hdd 7.27440 osd.11 up 1.00000 1.00000
...
-7 80.01839 host yz-node3
# yz-node3为空
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# down节点执行
# 注意:这里是删除所有的osd
# 如:只有几个osd down了那就查相应osd对于的磁盘并删除
[root@yz-node3 ~]# dmsetup remove $(dmsetup ls | grep ceph | awk '{print $1}')

# 由于该节点osd全部down了,那么就需要全部删除
[root@yz-node3 ~]# lvm
lvm> pvs
PV VG Fmt Attr PSize PFree
/dev/sda3 centos lvm2 a-- 206.00g 4.00m
/dev/sdb ceph-2209af3b-b364-4a42-a741-003a52c2cfc2 lvm2 a-- <7.28t 0
/dev/sdc ceph-05735670-3015-4b5b-bfd7-065f1b148c3a lvm2 a-- <7.28t 0
/dev/sdd ceph-0d9b79dd-e90f-4b77-a387-e4262ba5de32 lvm2 a-- <7.28t 0
/dev/sde ceph-96438596-c0c1-45e8-9b84-b9e67ccb73df lvm2 a-- <7.28t 0
/dev/sdf ceph-2e683c99-7614-4a54-a86c-c045c4455a80 lvm2 a-- <7.28t 0
/dev/sdg ceph-9d67aa79-3a74-442f-a659-0423048cb02e lvm2 a-- <7.28t 0
/dev/sdh ceph-fb94e258-a081-4af4-b1cb-01050a13305b lvm2 a-- <7.28t 0
/dev/sdi ceph-513e8fdf-cd2a-42ca-89dc-3758a0f47053 lvm2 a-- <7.28t 0
/dev/sdj ceph-79a08396-9439-45c6-83ad-b5f231bc9db0 lvm2 a-- <7.28t 0
/dev/sdk ceph-ac5c5caa-3690-4f94-9960-c2e6eb3bba16 lvm2 a-- <7.28t 0
/dev/sdl ceph-7a3fb257-814f-4899-91e8-3f2c01bb7683 lvm2 a-- <7.28t 0
/dev/sdm ceph-bd760405-4d2e-46bb-89e9-a6bdcdc29b05 lvm2 a-- <7.28t 0
lvm> pvcreate -ff /dev/sdb # 强制初始化
...
lvm> pvcreate -ff /dev/sdm
lvm> pvremove /dev/sdb # 删除物理卷
...
lvm> pvremove /dev/sdm
lvm> pvs
PV VG Fmt Attr PSize PFree
/dev/sda3 centos lvm2 a-- 206.00g 4.00m
[root@yz-node3 ~]# ceph-volume lvm zap /dev/sdb
……
[root@yz-node3 ~]# ceph-volume lvm zap /dev/sdm
# 当sd磁盘不存在时说明已解决
[root@yz-node3 ~]# lsblk
sdb 8:16 0 7.3T 0 disk
sdc 8:32 0 7.3T 0 disk
sdd 8:48 0 7.3T 0 disk
sde 8:64 0 7.3T 0 disk
sdf 8:80 0 7.3T 0 disk
sdg 8:96 0 7.3T 0 disk
sdh 8:112 0 7.3T 0 disk
sdi 8:128 0 7.3T 0 disk
sdj 8:144 0 7.3T 0 disk
sdk 8:160 0 7.3T 0 disk
sdl 8:176 0 7.3T 0 disk
sdm 8:192 0 7.3T 0 disk
1
2
3
4
# down节点执行
[root@yz-node3 ~]# parted /dev/sdb mklabel gpt -s
... # 注:更换盘符
[root@yz-node3 ~]# parted /dev/sdm mklabel gpt -s

重新创建osd

1
2
3
4
5
6
7
8
# deploy节点执行
[root@yz-node1 ~]# cd /etc/ceph/
[root@yz-node1 ceph]# ceph-deploy disk zap yz-node3 /dev/sdb
.... # 注:更换盘符
[root@yz-node1 ceph]# ceph-deploy disk zap yz-node3 /dev/sdm
[root@yz-node1 ceph]# ceph-deploy osd create --filestore --fs-type xfs --data /dev/sdb --journal /dev/sda7 yz-node3
... # 注:更换盘符
[root@yz-node1 ceph]# ceph-deploy osd create --filestore --fs-type xfs --data /dev/sdm --journal /dev/sda18 yz-node3

查看ceph集群状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
[root@yz-node1 osd]# ceph -s
cluster:
id: 29b16d13-9431-4a94-920e-e624f3d1e55a
health: HEALTH_WARN
Reduced data availability: 28 pgs inactive, 197 pgs peering
Degraded data redundancy: 48/864 objects degraded (5.556%), 36 pgs degraded, 1 pg undersized
application not enabled on 1 pool(s)
# 出现WARN

services:
mon: 3 daemons, quorum yz-node1,yz-node2,yz-node3
mgr: yz-node1(active)
osd: 36 osds: 36 up, 36 in; 2 remapped pgs

data:
pools: 5 pools, 1024 pgs
objects: 288 objects, 1005 MiB
usage: 7.8 GiB used, 262 TiB / 262 TiB avail
pgs: 25.977% pgs not active
48/864 objects degraded (5.556%)
340 active+clean
265 peering
236 stale+active+clean
147 active+undersized
35 active+undersized+degraded
1 stale+activating+undersized+degraded+remapped

重启OSD

1
[root@yz-node1 osd]# systemctl restart ceph-osd.target

查看状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[root@yz-node1 osd]# ceph -s
cluster:
id: 29b16d13-9431-4a94-920e-e624f3d1e55a
health: HEALTH_WARN
# application not enabled on 1 pool(s)
# 又出现问题
services:
mon: 3 daemons, quorum yz-node1,yz-node2,yz-node3
mgr: yz-node1(active)
osd: 36 osds: 36 up, 36 in

data:
pools: 5 pools, 1024 pgs
objects: 288 objects, 1005 MiB
usage: 8.6 GiB used, 262 TiB / 262 TiB avail
pgs: 1024 active+clean
1
2
3
4
5
6
[root@yz-node1 osd]# ceph health detail
HEALTH_WARN application not enabled on 1 pool(s)
POOL_APP_NOT_ENABLED application not enabled on 1 pool(s)
application not enabled on pool 'myrbd'
use 'ceph osd pool application enable <pool-name> <app-name>', where <app-name> is 'cephfs', 'rbd', 'rgw', or freeform for custom applications.
[root@yz-node1 osd]# ceph osd pool application enable myrbd rgw

执行

1
2
[root@yz-node1 osd]# ceph osd pool application enable myrbd rgw
enabled application 'rgw' on pool 'myrbd'

查看状态

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
[root@yz-node1 osd]# ceph -s
cluster:
id: 29b16d13-9431-4a94-920e-e624f3d1e55a
health: HEALTH_OK

services:
mon: 3 daemons, quorum yz-node1,yz-node2,yz-node3
mgr: yz-node1(active)
osd: 36 osds: 36 up, 36 in

data:
pools: 5 pools, 1024 pgs
objects: 288 objects, 1005 MiB
usage: 8.6 GiB used, 262 TiB / 262 TiB avail
pgs: 1024 active+clean
# 恢复正常