Re: Problems after migrating to straw2 (to enable the balancer)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Dan

I have indeed at the moment only 5 OSD nodes on 3 racks.
The crush-map is attached.
Are you suggesting to replicate only between nodes and not between racks (since the very few resources) ?
Thanks, Massimo

On Mon, Jan 14, 2019 at 3:29 PM Dan van der Ster <dan@xxxxxxxxxxxxxx> wrote:
On Mon, Jan 14, 2019 at 3:18 PM Massimo Sgaravatto
<massimo.sgaravatto@xxxxxxxxx> wrote:
>
> Thanks for the prompt reply
>
> Indeed I have different racks with different weights.

Are you sure you're replicating across racks? You have only 3 racks,
one of which is half the size of the other two -- if yes, then your
cluster will be full once that rack is full.

-- dan


> Below the ceph osd tree" output
>
> [root@ceph-mon-01 ~]# ceph osd tree
> ID CLASS WEIGHT    TYPE NAME                 STATUS REWEIGHT PRI-AFF
> -1       272.80426 root default
> -7       109.12170     rack Rack11-PianoAlto
> -8        54.56085         host ceph-osd-04
> 30   hdd   5.45609             osd.30            up  1.00000 1.00000
> 31   hdd   5.45609             osd.31            up  1.00000 1.00000
> 32   hdd   5.45609             osd.32            up  1.00000 1.00000
> 33   hdd   5.45609             osd.33            up  1.00000 1.00000
> 34   hdd   5.45609             osd.34            up  1.00000 1.00000
> 35   hdd   5.45609             osd.35            up  1.00000 1.00000
> 36   hdd   5.45609             osd.36            up  1.00000 1.00000
> 37   hdd   5.45609             osd.37            up  1.00000 1.00000
> 38   hdd   5.45609             osd.38            up  1.00000 1.00000
> 39   hdd   5.45609             osd.39            up  1.00000 1.00000
> -9        54.56085         host ceph-osd-05
> 40   hdd   5.45609             osd.40            up  1.00000 1.00000
> 41   hdd   5.45609             osd.41            up  1.00000 1.00000
> 42   hdd   5.45609             osd.42            up  1.00000 1.00000
> 43   hdd   5.45609             osd.43            up  1.00000 1.00000
> 44   hdd   5.45609             osd.44            up  1.00000 1.00000
> 45   hdd   5.45609             osd.45            up  1.00000 1.00000
> 46   hdd   5.45609             osd.46            up  1.00000 1.00000
> 47   hdd   5.45609             osd.47            up  1.00000 1.00000
> 48   hdd   5.45609             osd.48            up  1.00000 1.00000
> 49   hdd   5.45609             osd.49            up  1.00000 1.00000
> -6       109.12170     rack Rack15-PianoAlto
> -3        54.56085         host ceph-osd-02
> 10   hdd   5.45609             osd.10            up  1.00000 1.00000
> 11   hdd   5.45609             osd.11            up  1.00000 1.00000
> 12   hdd   5.45609             osd.12            up  1.00000 1.00000
> 13   hdd   5.45609             osd.13            up  1.00000 1.00000
> 14   hdd   5.45609             osd.14            up  1.00000 1.00000
> 15   hdd   5.45609             osd.15            up  1.00000 1.00000
> 16   hdd   5.45609             osd.16            up  1.00000 1.00000
> 17   hdd   5.45609             osd.17            up  1.00000 1.00000
> 18   hdd   5.45609             osd.18            up  1.00000 1.00000
> 19   hdd   5.45609             osd.19            up  1.00000 1.00000
> -4        54.56085         host ceph-osd-03
> 20   hdd   5.45609             osd.20            up  1.00000 1.00000
> 21   hdd   5.45609             osd.21            up  1.00000 1.00000
> 22   hdd   5.45609             osd.22            up  1.00000 1.00000
> 23   hdd   5.45609             osd.23            up  1.00000 1.00000
> 24   hdd   5.45609             osd.24            up  1.00000 1.00000
> 25   hdd   5.45609             osd.25            up  1.00000 1.00000
> 26   hdd   5.45609             osd.26            up  1.00000 1.00000
> 27   hdd   5.45609             osd.27            up  1.00000 1.00000
> 28   hdd   5.45609             osd.28            up  1.00000 1.00000
> 29   hdd   5.45609             osd.29            up  1.00000 1.00000
> -5        54.56085     rack Rack17-PianoAlto
> -2        54.56085         host ceph-osd-01
>  0   hdd   5.45609             osd.0             up  1.00000 1.00000
>  1   hdd   5.45609             osd.1             up  1.00000 1.00000
>  2   hdd   5.45609             osd.2             up  1.00000 1.00000
>  3   hdd   5.45609             osd.3             up  1.00000 1.00000
>  4   hdd   5.45609             osd.4             up  1.00000 1.00000
>  5   hdd   5.45609             osd.5             up  1.00000 1.00000
>  6   hdd   5.45609             osd.6             up  1.00000 1.00000
>  7   hdd   5.45609             osd.7             up  1.00000 1.00000
>  8   hdd   5.45609             osd.8             up  1.00000 1.00000
>  9   hdd   5.45609             osd.9             up  1.00000 1.00000
> [root@ceph-mon-01 ~]#
>
> On Mon, Jan 14, 2019 at 3:13 PM Dan van der Ster <dan@xxxxxxxxxxxxxx> wrote:
>>
>> On Mon, Jan 14, 2019 at 3:06 PM Massimo Sgaravatto
>> <massimo.sgaravatto@xxxxxxxxx> wrote:
>> >
>> > I have a ceph luminous cluster running on CentOS7 nodes.
>> > This cluster has 50 OSDs, all with the same size and all with the same weight.
>> >
>> > Since I noticed that there was a quite "unfair" usage of OSD nodes (some used at 30 %, some used at 70 %) I tried to activate the balancer.
>> >
>> > But the balancer doesn't start I guess because of this problem:
>> >
>> > [root@ceph-mon-01 ~]# ceph osd crush weight-set create-compat
>> > Error EPERM: crush map contains one or more bucket(s) that are not straw2
>> >
>> >
>> > So I issued the command to convert from straw to straw2 (all the clients are running luminous):
>> >
>> >
>> > [root@ceph-mon-01 ~]# ceph osd crush set-all-straw-buckets-to-straw2
>> > Error EINVAL: new crush map requires client version hammer but require_min_compat_client is firefly
>> > [root@ceph-mon-01 ~]# ceph osd set-require-min-compat-client jewel
>> > set require_min_compat_client to jewel
>> > [root@ceph-mon-01 ~]# ceph osd crush set-all-straw-buckets-to-straw2
>> > [root@ceph-mon-01 ~]#
>> >
>> >
>> > After having issued the command, the cluster went in WARNING state because ~ 12 % objects were misplaced.
>> >
>> > Is this normal ?
>> > I read somewhere that the migration from straw to straw2 should trigger a data migration only if the OSDs have different sizes, which is not my case.
>>
>> The relevant sizes to compare are the crush buckets across which you
>> are replicating.
>> Are you replicating host-wise or rack-wise?
>> Do you have hosts/racks with a different crush weight (e.g. different
>> crush size).
>> Maybe share your `ceph osd tree`.
>>
>> Cheers, dan
>>
>>
>>
>> >
>> >
>> > The cluster is still recovering, but what is worrying me is that it looks like that data are being moved to the most used OSDs and the MAX_AVAIL value is decreasing quite quickly.
>> >
>> > I hope that the recovery can finish without causing problems: then I will immediately activate the balancer.
>> >
>> > But, if some OSDs are getting too full, is it safe to decrease their weights  while the cluster is still being recovered ?
>> >
>> > Thanks a lot for your help
>> > Of course I can provide other info, if needed
>> >
>> >
>> > Cheers, Massimo
>> >
>> > _______________________________________________
>> > ceph-users mailing list
>> > ceph-users@xxxxxxxxxxxxxx
>> > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1

# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
device 20 osd.20 class hdd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 23 osd.23 class hdd
device 24 osd.24 class hdd
device 25 osd.25 class hdd
device 26 osd.26 class hdd
device 27 osd.27 class hdd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class hdd
device 31 osd.31 class hdd
device 32 osd.32 class hdd
device 33 osd.33 class hdd
device 34 osd.34 class hdd
device 35 osd.35 class hdd
device 36 osd.36 class hdd
device 37 osd.37 class hdd
device 38 osd.38 class hdd
device 39 osd.39 class hdd
device 40 osd.40 class hdd
device 41 osd.41 class hdd
device 42 osd.42 class hdd
device 43 osd.43 class hdd
device 44 osd.44 class hdd
device 45 osd.45 class hdd
device 46 osd.46 class hdd
device 47 osd.47 class hdd
device 48 osd.48 class hdd
device 49 osd.49 class hdd

# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root

# buckets
host ceph-osd-01 {
	id -2		# do not change unnecessarily
	id -10 class hdd		# do not change unnecessarily
	# weight 54.561
	alg straw2
	hash 0	# rjenkins1
	item osd.0 weight 5.456
	item osd.2 weight 5.456
	item osd.3 weight 5.456
	item osd.4 weight 5.456
	item osd.5 weight 5.456
	item osd.6 weight 5.456
	item osd.7 weight 5.456
	item osd.8 weight 5.456
	item osd.9 weight 5.456
	item osd.1 weight 5.456
}
rack Rack17-PianoAlto {
	id -5		# do not change unnecessarily
	id -11 class hdd		# do not change unnecessarily
	# weight 54.561
	alg straw2
	hash 0	# rjenkins1
	item ceph-osd-01 weight 54.561
}
host ceph-osd-02 {
	id -3		# do not change unnecessarily
	id -12 class hdd		# do not change unnecessarily
	# weight 54.561
	alg straw2
	hash 0	# rjenkins1
	item osd.10 weight 5.456
	item osd.11 weight 5.456
	item osd.12 weight 5.456
	item osd.13 weight 5.456
	item osd.14 weight 5.456
	item osd.15 weight 5.456
	item osd.16 weight 5.456
	item osd.17 weight 5.456
	item osd.18 weight 5.456
	item osd.19 weight 5.456
}
host ceph-osd-03 {
	id -4		# do not change unnecessarily
	id -13 class hdd		# do not change unnecessarily
	# weight 54.561
	alg straw2
	hash 0	# rjenkins1
	item osd.20 weight 5.456
	item osd.21 weight 5.456
	item osd.22 weight 5.456
	item osd.23 weight 5.456
	item osd.25 weight 5.456
	item osd.26 weight 5.456
	item osd.28 weight 5.456
	item osd.29 weight 5.456
	item osd.27 weight 5.456
	item osd.24 weight 5.456
}
rack Rack15-PianoAlto {
	id -6		# do not change unnecessarily
	id -14 class hdd		# do not change unnecessarily
	# weight 109.122
	alg straw2
	hash 0	# rjenkins1
	item ceph-osd-02 weight 54.561
	item ceph-osd-03 weight 54.561
}
host ceph-osd-04 {
	id -8		# do not change unnecessarily
	id -15 class hdd		# do not change unnecessarily
	# weight 54.561
	alg straw2
	hash 0	# rjenkins1
	item osd.30 weight 5.456
	item osd.31 weight 5.456
	item osd.32 weight 5.456
	item osd.33 weight 5.456
	item osd.34 weight 5.456
	item osd.35 weight 5.456
	item osd.36 weight 5.456
	item osd.37 weight 5.456
	item osd.38 weight 5.456
	item osd.39 weight 5.456
}
host ceph-osd-05 {
	id -9		# do not change unnecessarily
	id -16 class hdd		# do not change unnecessarily
	# weight 54.561
	alg straw2
	hash 0	# rjenkins1
	item osd.40 weight 5.456
	item osd.41 weight 5.456
	item osd.42 weight 5.456
	item osd.43 weight 5.456
	item osd.44 weight 5.456
	item osd.45 weight 5.456
	item osd.46 weight 5.456
	item osd.47 weight 5.456
	item osd.48 weight 5.456
	item osd.49 weight 5.456
}
rack Rack11-PianoAlto {
	id -7		# do not change unnecessarily
	id -17 class hdd		# do not change unnecessarily
	# weight 109.122
	alg straw2
	hash 0	# rjenkins1
	item ceph-osd-04 weight 54.561
	item ceph-osd-05 weight 54.561
}
root default {
	id -1		# do not change unnecessarily
	id -18 class hdd		# do not change unnecessarily
	# weight 272.804
	alg straw2
	hash 0	# rjenkins1
	item Rack17-PianoAlto weight 54.561
	item Rack15-PianoAlto weight 109.122
	item Rack11-PianoAlto weight 109.122
}

# rules
rule replicated_ruleset {
	id 0
	type replicated
	min_size 1
	max_size 10
	step take default
	step chooseleaf firstn 0 type host
	step emit
}

# end crush map
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux