OSDs stuck in booting state on CentOS 7.2.1511 and ceph infernalis 9.2.0

Bob R <bobr@xxxxxxxxxxxxxx> · Wed, 16 Dec 2015 10:45:19 -0800

We've been operating a cluster relatively incident free since 0.86. On Monday I did a yum update on one node, ceph00, and after rebooting we're seeing every OSD stuck in 'booting' state. I've tried removing all of the OSDs and recreating them with ceph-deploy (ceph-disk required modification to use partx -a rather than partprobe) but we see the same status. I'm not sure how to troubleshoot this further. Our OSDs on this host are now running as the ceph user which may be related to the issue as the other three hosts are running as root (although I followed the steps listed to upgrade from hammer to infernalis and did chown -R ceph:ceph /var/lib/ceph on each node).

[root@ceph00 ceph]# lsb_release -idrc
Distributor ID: CentOS
Description:    CentOS Linux release 7.2.1511 (Core)
Release:        7.2.1511
Codename:       Core

[root@ceph00 ceph]# ceph --version
ceph version 9.2.0 (bb2ecea240f3a1d525bcb35670cb07bd1f0ca299)

[root@ceph00 ceph]# ceph daemon osd.0 status
{
    "cluster_fsid": "2e4ea2c0-fb62-41fa-b7b7-e34d759b851e",
    "osd_fsid": "ddf659ad-a3db-4094-b4d0-7d50f34b8f75",
    "whoami": 0,
    "state": "booting",
    "oldest_map": 25243,
    "newest_map": 26610,
    "num_pgs": 0
}

[root@ceph00 ceph]# ceph daemon osd.3 status
{
    "cluster_fsid": "2e4ea2c0-fb62-41fa-b7b7-e34d759b851e",
    "osd_fsid": "8b1acd8a-645d-4dc2-8c1d-6dbb1715265f",
    "whoami": 3,
    "state": "booting",
    "oldest_map": 25243,
    "newest_map": 26612,
    "num_pgs": 0
}

[root@ceph00 ceph]# ceph osd tree
ID  WEIGHT    TYPE NAME           UP/DOWN REWEIGHT PRIMARY-AFFINITY
-23   1.43999 root ssd
-19         0     host ceph00_ssd
-20   0.48000     host ceph01_ssd
 40   0.48000         osd.40           up  1.00000          1.00000
-21   0.48000     host ceph02_ssd
 43   0.48000         osd.43           up  1.00000          1.00000
-22   0.48000     host ceph03_ssd
 41   0.48000         osd.41           up  1.00000          1.00000
 -1 120.00000 root default
-17  80.00000     room b1
-14  40.00000         host ceph01
  1   4.00000             osd.1        up  1.00000          1.00000
  4   4.00000             osd.4        up  1.00000          1.00000
 18   4.00000             osd.18       up  1.00000          1.00000
 19   4.00000             osd.19       up  1.00000          1.00000
 20   4.00000             osd.20       up  1.00000          1.00000
 21   4.00000             osd.21       up  1.00000          1.00000
 22   4.00000             osd.22       up  1.00000          1.00000
 23   4.00000             osd.23       up  1.00000          1.00000
 24   4.00000             osd.24       up  1.00000          1.00000
 25   4.00000             osd.25       up  1.00000          1.00000
-16  40.00000         host ceph03
 30   4.00000             osd.30       up  1.00000          1.00000
 31   4.00000             osd.31       up  1.00000          1.00000
 32   4.00000             osd.32       up  1.00000          1.00000
 33   4.00000             osd.33       up  1.00000          1.00000
 34   4.00000             osd.34       up  1.00000          1.00000
 35   4.00000             osd.35       up  1.00000          1.00000
 36   4.00000             osd.36       up  1.00000          1.00000
 37   4.00000             osd.37       up  1.00000          1.00000
 38   4.00000             osd.38       up  1.00000          1.00000
 39   4.00000             osd.39       up  1.00000          1.00000
-18  40.00000     room b2
-13         0         host ceph00
-15  40.00000         host ceph02
  2   4.00000             osd.2        up  1.00000          1.00000
  5   4.00000             osd.5        up  1.00000          1.00000
 14   4.00000             osd.14       up  1.00000          1.00000
 15   4.00000             osd.15       up  1.00000          1.00000
 16   4.00000             osd.16       up  1.00000          1.00000
 17   4.00000             osd.17       up  1.00000          1.00000
 26   4.00000             osd.26       up  1.00000          1.00000
 27   4.00000             osd.27       up  1.00000          1.00000
 28   4.00000             osd.28       up  1.00000          1.00000
 29   4.00000             osd.29       up  1.00000          1.00000
  0         0 osd.0                  down        0          1.00000
  3         0 osd.3                  down        0          1.00000
  6         0 osd.6                  down        0          1.00000
  7         0 osd.7                  down        0          1.00000
  8         0 osd.8                  down        0          1.00000
  9         0 osd.9                  down        0          1.00000
 10         0 osd.10                 down        0          1.00000
 11         0 osd.11                 down        0          1.00000
 12         0 osd.12                 down        0          1.00000
 13         0 osd.13                 down        0          1.00000

Any assistance is greatly appreciated.

Bob
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com