Ceph not recovering after osd/host failure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

I’m testing Ceph Luminous 12.2.1 installed with ceph ansible.

Doing some failover tests I noticed that when I kill an osd or and hosts Ceph doesn’t recover automatically remaining in this state until I bring OSDs or host back online.
I’ve 3 pools volumes, cephfs_data and cephfs_metadata with size 3 and min_size 1.

Is there something I’m missing ?

Below some cluster info.

Thank you all
Regards

Matteo


  cluster:
    id:     ab7cb890-ee21-484e-9290-14b9e5e85125
    health: HEALTH_WARN
            3 osds down
            Degraded data redundancy: 2842/73686 objects degraded (3.857%), 318 pgs unclean, 318 pgs degraded, 318 pgs undersized

  services:
    mon: 3 daemons, quorum controller001,controller002,controller003
    mgr: controller001(active), standbys: controller002, controller003
    mds: cephfs-1/1/1 up  {0=controller002=up:active}, 2 up:standby
    osd: 77 osds: 74 up, 77 in

  data:
    pools:   3 pools, 4112 pgs
    objects: 36843 objects, 142 GB
    usage:   470 GB used, 139 TB / 140 TB avail
    pgs:     2842/73686 objects degraded (3.857%)
             3794 active+clean
             318  active+undersized+degraded


ID  CLASS WEIGHT    TYPE NAME           STATUS REWEIGHT PRI-AFF
 -1       140.02425 root default
 -9        20.00346     host storage001
  0   hdd   1.81850         osd.0           up  1.00000 1.00000
  6   hdd   1.81850         osd.6           up  1.00000 1.00000
  8   hdd   1.81850         osd.8           up  1.00000 1.00000
 11   hdd   1.81850         osd.11          up  1.00000 1.00000
 14   hdd   1.81850         osd.14          up  1.00000 1.00000
 18   hdd   1.81850         osd.18          up  1.00000 1.00000
 24   hdd   1.81850         osd.24          up  1.00000 1.00000
 28   hdd   1.81850         osd.28          up  1.00000 1.00000
 33   hdd   1.81850         osd.33          up  1.00000 1.00000
 40   hdd   1.81850         osd.40          up  1.00000 1.00000
 45   hdd   1.81850         osd.45          up  1.00000 1.00000
 -7        20.00346     host storage002
  1   hdd   1.81850         osd.1           up  1.00000 1.00000
  5   hdd   1.81850         osd.5           up  1.00000 1.00000
  9   hdd   1.81850         osd.9           up  1.00000 1.00000
 21   hdd   1.81850         osd.21          up  1.00000 1.00000
 22   hdd   1.81850         osd.22          up  1.00000 1.00000
 23   hdd   1.81850         osd.23          up  1.00000 1.00000
 35   hdd   1.81850         osd.35          up  1.00000 1.00000
 36   hdd   1.81850         osd.36          up  1.00000 1.00000
 38   hdd   1.81850         osd.38          up  1.00000 1.00000
 42   hdd   1.81850         osd.42          up  1.00000 1.00000
 49   hdd   1.81850         osd.49          up  1.00000 1.00000
-11        20.00346     host storage003
 27   hdd   1.81850         osd.27          up  1.00000 1.00000
 31   hdd   1.81850         osd.31          up  1.00000 1.00000
 32   hdd   1.81850         osd.32          up  1.00000 1.00000
 37   hdd   1.81850         osd.37          up  1.00000 1.00000
 44   hdd   1.81850         osd.44          up  1.00000 1.00000
 46   hdd   1.81850         osd.46          up  1.00000 1.00000
 48   hdd   1.81850         osd.48          up  1.00000 1.00000
 53   hdd   1.81850         osd.53          up  1.00000 1.00000
 54   hdd   1.81850         osd.54          up  1.00000 1.00000
 56   hdd   1.81850         osd.56          up  1.00000 1.00000
 59   hdd   1.81850         osd.59          up  1.00000 1.00000
 -3        20.00346     host storage004
  2   hdd   1.81850         osd.2           up  1.00000 1.00000
  4   hdd   1.81850         osd.4           up  1.00000 1.00000
 10   hdd   1.81850         osd.10          up  1.00000 1.00000
 16   hdd   1.81850         osd.16          up  1.00000 1.00000
 17   hdd   1.81850         osd.17          up  1.00000 1.00000
 19   hdd   1.81850         osd.19          up  1.00000 1.00000
 26   hdd   1.81850         osd.26          up  1.00000 1.00000
 29   hdd   1.81850         osd.29          up  1.00000 1.00000
 39   hdd   1.81850         osd.39          up  1.00000 1.00000
 43   hdd   1.81850         osd.43          up  1.00000 1.00000
 50   hdd   1.81850         osd.50          up  1.00000 1.00000
 -5        20.00346     host storage005
  3   hdd   1.81850         osd.3           up  1.00000 1.00000
  7   hdd   1.81850         osd.7           up  1.00000 1.00000
 12   hdd   1.81850         osd.12          up  1.00000 1.00000
 13   hdd   1.81850         osd.13          up  1.00000 1.00000
 15   hdd   1.81850         osd.15          up  1.00000 1.00000
 20   hdd   1.81850         osd.20          up  1.00000 1.00000
 25   hdd   1.81850         osd.25          up  1.00000 1.00000
 30   hdd   1.81850         osd.30          up  1.00000 1.00000
 34   hdd   1.81850         osd.34          up  1.00000 1.00000
 41   hdd   1.81850         osd.41          up  1.00000 1.00000
 47   hdd   1.81850         osd.47          up  1.00000 1.00000
-13        20.00346     host storage006
 51   hdd   1.81850         osd.51          up  1.00000 1.00000
 55   hdd   1.81850         osd.55          up  1.00000 1.00000
 58   hdd   1.81850         osd.58          up  1.00000 1.00000
 61   hdd   1.81850         osd.61          up  1.00000 1.00000
 63   hdd   1.81850         osd.63          up  1.00000 1.00000
 65   hdd   1.81850         osd.65          up  1.00000 1.00000
 66   hdd   1.81850         osd.66          up  1.00000 1.00000
 69   hdd   1.81850         osd.69          up  1.00000 1.00000
 71   hdd   1.81850         osd.71          up  1.00000 1.00000
 73   hdd   1.81850         osd.73          up  1.00000 1.00000
 75   hdd   1.81850         osd.75          up  1.00000 1.00000
-15        20.00346     host storage007
 52   hdd   1.81850         osd.52          up  1.00000 1.00000
 57   hdd   1.81850         osd.57          up  1.00000 1.00000
 60   hdd   1.81850         osd.60          up  1.00000 1.00000
 62   hdd   1.81850         osd.62          up  1.00000 1.00000
 64   hdd   1.81850         osd.64          up  1.00000 1.00000
 67   hdd   1.81850         osd.67          up  1.00000 1.00000
 68   hdd   1.81850         osd.68          up  1.00000 1.00000
 70   hdd   1.81850         osd.70          up  1.00000 1.00000
 72   hdd   1.81850         osd.72          up  1.00000 1.00000
 74   hdd   1.81850         osd.74          up  1.00000 1.00000
 76   hdd   1.81850         osd.76          up  1.00000 1.00000


# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54

# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
device 20 osd.20 class hdd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 23 osd.23 class hdd
device 24 osd.24 class hdd
device 25 osd.25 class hdd
device 26 osd.26 class hdd
device 27 osd.27 class hdd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class hdd
device 31 osd.31 class hdd
device 32 osd.32 class hdd
device 33 osd.33 class hdd
device 34 osd.34 class hdd
device 35 osd.35 class hdd
device 36 osd.36 class hdd
device 37 osd.37 class hdd
device 38 osd.38 class hdd
device 39 osd.39 class hdd
device 40 osd.40 class hdd
device 41 osd.41 class hdd
device 42 osd.42 class hdd
device 43 osd.43 class hdd
device 44 osd.44 class hdd
device 45 osd.45 class hdd
device 46 osd.46 class hdd
device 47 osd.47 class hdd
device 48 osd.48 class hdd
device 49 osd.49 class hdd
device 50 osd.50 class hdd
device 51 osd.51 class hdd
device 52 osd.52 class hdd
device 53 osd.53 class hdd
device 54 osd.54 class hdd
device 55 osd.55 class hdd
device 56 osd.56 class hdd
device 57 osd.57 class hdd
device 58 osd.58 class hdd
device 59 osd.59 class hdd
device 60 osd.60 class hdd
device 61 osd.61 class hdd
device 62 osd.62 class hdd
device 63 osd.63 class hdd
device 64 osd.64 class hdd
device 65 osd.65 class hdd
device 66 osd.66 class hdd
device 67 osd.67 class hdd
device 68 osd.68 class hdd
device 69 osd.69 class hdd
device 70 osd.70 class hdd
device 71 osd.71 class hdd
device 72 osd.72 class hdd
device 73 osd.73 class hdd
device 74 osd.74 class hdd
device 75 osd.75 class hdd
device 76 osd.76 class hdd

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host storage004 {
	id -3		# do not change unnecessarily
	id -4 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.2 weight 1.818
	item osd.4 weight 1.818
	item osd.10 weight 1.818
	item osd.16 weight 1.818
	item osd.17 weight 1.818
	item osd.19 weight 1.818
	item osd.26 weight 1.818
	item osd.29 weight 1.818
	item osd.39 weight 1.818
	item osd.43 weight 1.818
	item osd.50 weight 1.818
}
host storage005 {
	id -5		# do not change unnecessarily
	id -6 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.3 weight 1.818
	item osd.7 weight 1.818
	item osd.12 weight 1.818
	item osd.13 weight 1.818
	item osd.15 weight 1.818
	item osd.20 weight 1.818
	item osd.25 weight 1.818
	item osd.30 weight 1.818
	item osd.34 weight 1.818
	item osd.41 weight 1.818
	item osd.47 weight 1.818
}
host storage002 {
	id -7		# do not change unnecessarily
	id -8 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.1 weight 1.818
	item osd.5 weight 1.818
	item osd.9 weight 1.818
	item osd.21 weight 1.818
	item osd.22 weight 1.818
	item osd.23 weight 1.818
	item osd.35 weight 1.818
	item osd.36 weight 1.818
	item osd.38 weight 1.818
	item osd.42 weight 1.818
	item osd.49 weight 1.818
}
host storage001 {
	id -9		# do not change unnecessarily
	id -10 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.0 weight 1.818
	item osd.6 weight 1.818
	item osd.8 weight 1.818
	item osd.11 weight 1.818
	item osd.14 weight 1.818
	item osd.18 weight 1.818
	item osd.24 weight 1.818
	item osd.28 weight 1.818
	item osd.33 weight 1.818
	item osd.40 weight 1.818
	item osd.45 weight 1.818
}
host storage003 {
	id -11		# do not change unnecessarily
	id -12 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.27 weight 1.818
	item osd.31 weight 1.818
	item osd.32 weight 1.818
	item osd.37 weight 1.818
	item osd.44 weight 1.818
	item osd.46 weight 1.818
	item osd.48 weight 1.818
	item osd.54 weight 1.818
	item osd.53 weight 1.818
	item osd.59 weight 1.818
	item osd.56 weight 1.818
}
host storage006 {
	id -13		# do not change unnecessarily
	id -14 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.51 weight 1.818
	item osd.55 weight 1.818
	item osd.58 weight 1.818
	item osd.61 weight 1.818
	item osd.63 weight 1.818
	item osd.65 weight 1.818
	item osd.66 weight 1.818
	item osd.69 weight 1.818
	item osd.71 weight 1.818
	item osd.73 weight 1.818
	item osd.75 weight 1.818
}
host storage007 {
	id -15		# do not change unnecessarily
	id -16 class hdd		# do not change unnecessarily
	# weight 20.003
	alg straw2
	hash 0	# rjenkins1
	item osd.52 weight 1.818
	item osd.57 weight 1.818
	item osd.60 weight 1.818
	item osd.62 weight 1.818
	item osd.64 weight 1.818
	item osd.67 weight 1.818
	item osd.70 weight 1.818
	item osd.68 weight 1.818
	item osd.72 weight 1.818
	item osd.74 weight 1.818
	item osd.76 weight 1.818
}
root default {
	id -1		# do not change unnecessarily
	id -2 class hdd		# do not change unnecessarily
	# weight 140.024
	alg straw2
	hash 0	# rjenkins1
	item storage004 weight 20.003
	item storage005 weight 20.003
	item storage002 weight 20.003
	item storage001 weight 20.003
	item storage003 weight 20.003
	item storage006 weight 20.003
	item storage007 weight 20.003
}

# rules
rule replicated_rule {
	id 0
	type replicated
	min_size 1
	max_size 10
	step take default
	step chooseleaf firstn 0 type host
	step emit
}

# end crush map

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com




[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux