We've been operating a cluster relatively incident free since 0.86. On Monday I did a yum update on one node, ceph00, and after rebooting we're seeing every OSD stuck in 'booting' state. I've tried removing all of the OSDs and recreating them with ceph-deploy (ceph-disk required modification to use partx -a rather than partprobe) but we see the same status. I'm not sure how to troubleshoot this further. Our OSDs on this host are now running as the ceph user which may be related to the issue as the other three hosts are running as root (although I followed the steps listed to upgrade from hammer to infernalis and did chown -R ceph:ceph /var/lib/ceph on each node).
[root@ceph00 ceph]# lsb_release -idrc
Distributor ID: CentOS
Description: CentOS Linux release 7.2.1511 (Core)
Release: 7.2.1511
Codename: Core
[root@ceph00 ceph]# ceph --version
ceph version 9.2.0 (bb2ecea240f3a1d525bcb35670cb07bd1f0ca299)
[root@ceph00 ceph]# ceph daemon osd.0 status
{
"cluster_fsid": "2e4ea2c0-fb62-41fa-b7b7-e34d759b851e",
"osd_fsid": "ddf659ad-a3db-4094-b4d0-7d50f34b8f75",
"whoami": 0,
"state": "booting",
"oldest_map": 25243,
"newest_map": 26610,
"num_pgs": 0
}
[root@ceph00 ceph]# ceph daemon osd.3 status
{
"cluster_fsid": "2e4ea2c0-fb62-41fa-b7b7-e34d759b851e",
"osd_fsid": "8b1acd8a-645d-4dc2-8c1d-6dbb1715265f",
"whoami": 3,
"state": "booting",
"oldest_map": 25243,
"newest_map": 26612,
"num_pgs": 0
}
[root@ceph00 ceph]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-23 1.43999 root ssd
-19 0 host ceph00_ssd
-20 0.48000 host ceph01_ssd
40 0.48000 osd.40 up 1.00000 1.00000
-21 0.48000 host ceph02_ssd
43 0.48000 osd.43 up 1.00000 1.00000
-22 0.48000 host ceph03_ssd
41 0.48000 osd.41 up 1.00000 1.00000
-1 120.00000 root default
-17 80.00000 room b1
-14 40.00000 host ceph01
1 4.00000 osd.1 up 1.00000 1.00000
4 4.00000 osd.4 up 1.00000 1.00000
18 4.00000 osd.18 up 1.00000 1.00000
19 4.00000 osd.19 up 1.00000 1.00000
20 4.00000 osd.20 up 1.00000 1.00000
21 4.00000 osd.21 up 1.00000 1.00000
22 4.00000 osd.22 up 1.00000 1.00000
23 4.00000 osd.23 up 1.00000 1.00000
24 4.00000 osd.24 up 1.00000 1.00000
25 4.00000 osd.25 up 1.00000 1.00000
-16 40.00000 host ceph03
30 4.00000 osd.30 up 1.00000 1.00000
31 4.00000 osd.31 up 1.00000 1.00000
32 4.00000 osd.32 up 1.00000 1.00000
33 4.00000 osd.33 up 1.00000 1.00000
34 4.00000 osd.34 up 1.00000 1.00000
35 4.00000 osd.35 up 1.00000 1.00000
36 4.00000 osd.36 up 1.00000 1.00000
37 4.00000 osd.37 up 1.00000 1.00000
38 4.00000 osd.38 up 1.00000 1.00000
39 4.00000 osd.39 up 1.00000 1.00000
-18 40.00000 room b2
-13 0 host ceph00
-15 40.00000 host ceph02
2 4.00000 osd.2 up 1.00000 1.00000
5 4.00000 osd.5 up 1.00000 1.00000
14 4.00000 osd.14 up 1.00000 1.00000
15 4.00000 osd.15 up 1.00000 1.00000
16 4.00000 osd.16 up 1.00000 1.00000
17 4.00000 osd.17 up 1.00000 1.00000
26 4.00000 osd.26 up 1.00000 1.00000
27 4.00000 osd.27 up 1.00000 1.00000
28 4.00000 osd.28 up 1.00000 1.00000
29 4.00000 osd.29 up 1.00000 1.00000
0 0 osd.0 down 0 1.00000
3 0 osd.3 down 0 1.00000
6 0 osd.6 down 0 1.00000
7 0 osd.7 down 0 1.00000
8 0 osd.8 down 0 1.00000
9 0 osd.9 down 0 1.00000
10 0 osd.10 down 0 1.00000
11 0 osd.11 down 0 1.00000
12 0 osd.12 down 0 1.00000
13 0 osd.13 down 0 1.00000
Any assistance is greatly appreciated.
Bob
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com