Re: Problems after migrating to straw2 (to enable the balancer)

Wido den Hollander <wido@xxxxxxxx> · Mon, 14 Jan 2019 15:22:16 +0100

On 1/14/19 3:18 PM, Massimo Sgaravatto wrote:
> Thanks for the prompt reply
> 
> Indeed I have different racks with different weights. 
> Below the ceph osd tree" output
> 

Can you also show the output of 'ceph osd df' ?

The amount of PGs might be on the low side which also causes this imbalance.

If you do not have enough PGs CRUSH can't properly distribute either.

Wido

> [root@ceph-mon-01 ~]# ceph osd tree
> ID CLASS WEIGHT    TYPE NAME                 STATUS REWEIGHT PRI-AFF 
> -1       272.80426 root default                                      
> -7       109.12170     rack Rack11-PianoAlto                         
> -8        54.56085         host ceph-osd-04                          
> 30   hdd   5.45609             osd.30            up  1.00000 1.00000 
> 31   hdd   5.45609             osd.31            up  1.00000 1.00000 
> 32   hdd   5.45609             osd.32            up  1.00000 1.00000 
> 33   hdd   5.45609             osd.33            up  1.00000 1.00000 
> 34   hdd   5.45609             osd.34            up  1.00000 1.00000 
> 35   hdd   5.45609             osd.35            up  1.00000 1.00000 
> 36   hdd   5.45609             osd.36            up  1.00000 1.00000 
> 37   hdd   5.45609             osd.37            up  1.00000 1.00000 
> 38   hdd   5.45609             osd.38            up  1.00000 1.00000 
> 39   hdd   5.45609             osd.39            up  1.00000 1.00000 
> -9        54.56085         host ceph-osd-05                          
> 40   hdd   5.45609             osd.40            up  1.00000 1.00000 
> 41   hdd   5.45609             osd.41            up  1.00000 1.00000 
> 42   hdd   5.45609             osd.42            up  1.00000 1.00000 
> 43   hdd   5.45609             osd.43            up  1.00000 1.00000 
> 44   hdd   5.45609             osd.44            up  1.00000 1.00000 
> 45   hdd   5.45609             osd.45            up  1.00000 1.00000 
> 46   hdd   5.45609             osd.46            up  1.00000 1.00000 
> 47   hdd   5.45609             osd.47            up  1.00000 1.00000 
> 48   hdd   5.45609             osd.48            up  1.00000 1.00000 
> 49   hdd   5.45609             osd.49            up  1.00000 1.00000 
> -6       109.12170     rack Rack15-PianoAlto                         
> -3        54.56085         host ceph-osd-02                          
> 10   hdd   5.45609             osd.10            up  1.00000 1.00000 
> 11   hdd   5.45609             osd.11            up  1.00000 1.00000 
> 12   hdd   5.45609             osd.12            up  1.00000 1.00000 
> 13   hdd   5.45609             osd.13            up  1.00000 1.00000 
> 14   hdd   5.45609             osd.14            up  1.00000 1.00000 
> 15   hdd   5.45609             osd.15            up  1.00000 1.00000 
> 16   hdd   5.45609             osd.16            up  1.00000 1.00000 
> 17   hdd   5.45609             osd.17            up  1.00000 1.00000 
> 18   hdd   5.45609             osd.18            up  1.00000 1.00000 
> 19   hdd   5.45609             osd.19            up  1.00000 1.00000 
> -4        54.56085         host ceph-osd-03                          
> 20   hdd   5.45609             osd.20            up  1.00000 1.00000 
> 21   hdd   5.45609             osd.21            up  1.00000 1.00000 
> 22   hdd   5.45609             osd.22            up  1.00000 1.00000 
> 23   hdd   5.45609             osd.23            up  1.00000 1.00000 
> 24   hdd   5.45609             osd.24            up  1.00000 1.00000 
> 25   hdd   5.45609             osd.25            up  1.00000 1.00000 
> 26   hdd   5.45609             osd.26            up  1.00000 1.00000 
> 27   hdd   5.45609             osd.27            up  1.00000 1.00000 
> 28   hdd   5.45609             osd.28            up  1.00000 1.00000 
> 29   hdd   5.45609             osd.29            up  1.00000 1.00000 
> -5        54.56085     rack Rack17-PianoAlto                         
> -2        54.56085         host ceph-osd-01                          
>  0   hdd   5.45609             osd.0             up  1.00000 1.00000 
>  1   hdd   5.45609             osd.1             up  1.00000 1.00000 
>  2   hdd   5.45609             osd.2             up  1.00000 1.00000 
>  3   hdd   5.45609             osd.3             up  1.00000 1.00000 
>  4   hdd   5.45609             osd.4             up  1.00000 1.00000 
>  5   hdd   5.45609             osd.5             up  1.00000 1.00000 
>  6   hdd   5.45609             osd.6             up  1.00000 1.00000 
>  7   hdd   5.45609             osd.7             up  1.00000 1.00000 
>  8   hdd   5.45609             osd.8             up  1.00000 1.00000 
>  9   hdd   5.45609             osd.9             up  1.00000 1.00000 
> [root@ceph-mon-01 ~]#
> 
> On Mon, Jan 14, 2019 at 3:13 PM Dan van der Ster <dan@xxxxxxxxxxxxxx
> <mailto:dan@xxxxxxxxxxxxxx>> wrote:
> 
>     On Mon, Jan 14, 2019 at 3:06 PM Massimo Sgaravatto
>     <massimo.sgaravatto@xxxxxxxxx <mailto:massimo.sgaravatto@xxxxxxxxx>>
>     wrote:
>     >
>     > I have a ceph luminous cluster running on CentOS7 nodes.
>     > This cluster has 50 OSDs, all with the same size and all with the
>     same weight.
>     >
>     > Since I noticed that there was a quite "unfair" usage of OSD nodes
>     (some used at 30 %, some used at 70 %) I tried to activate the balancer.
>     >
>     > But the balancer doesn't start I guess because of this problem:
>     >
>     > [root@ceph-mon-01 ~]# ceph osd crush weight-set create-compat
>     > Error EPERM: crush map contains one or more bucket(s) that are not
>     straw2
>     >
>     >
>     > So I issued the command to convert from straw to straw2 (all the
>     clients are running luminous):
>     >
>     >
>     > [root@ceph-mon-01 ~]# ceph osd crush set-all-straw-buckets-to-straw2
>     > Error EINVAL: new crush map requires client version hammer but
>     require_min_compat_client is firefly
>     > [root@ceph-mon-01 ~]# ceph osd set-require-min-compat-client jewel
>     > set require_min_compat_client to jewel
>     > [root@ceph-mon-01 ~]# ceph osd crush set-all-straw-buckets-to-straw2
>     > [root@ceph-mon-01 ~]#
>     >
>     >
>     > After having issued the command, the cluster went in WARNING state
>     because ~ 12 % objects were misplaced.
>     >
>     > Is this normal ?
>     > I read somewhere that the migration from straw to straw2 should
>     trigger a data migration only if the OSDs have different sizes,
>     which is not my case.
> 
>     The relevant sizes to compare are the crush buckets across which you
>     are replicating.
>     Are you replicating host-wise or rack-wise?
>     Do you have hosts/racks with a different crush weight (e.g. different
>     crush size).
>     Maybe share your `ceph osd tree`.
> 
>     Cheers, dan
> 
> 
> 
>     >
>     >
>     > The cluster is still recovering, but what is worrying me is that
>     it looks like that data are being moved to the most used OSDs and
>     the MAX_AVAIL value is decreasing quite quickly.
>     >
>     > I hope that the recovery can finish without causing problems: then
>     I will immediately activate the balancer.
>     >
>     > But, if some OSDs are getting too full, is it safe to decrease
>     their weights  while the cluster is still being recovered ?
>     >
>     > Thanks a lot for your help
>     > Of course I can provide other info, if needed
>     >
>     >
>     > Cheers, Massimo
>     >
>     > _______________________________________________
>     > ceph-users mailing list
>     > ceph-users@xxxxxxxxxxxxxx <mailto:ceph-users@xxxxxxxxxxxxxx>
>     > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
> 
> 
> _______________________________________________
> ceph-users mailing list
> ceph-users@xxxxxxxxxxxxxx
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
> 
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com