Have attached crushmap encase anyone can see any issues there:
ceph osd df
ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS
26 hdd 0.00999 1.00000 10 GiB 1.1 GiB 143 MiB 40 MiB 984 MiB 8.9 GiB 11.40 0.16 33 up
27 hdd 0.00999 1.00000 10 GiB 1.2 GiB 164 MiB 29 MiB 995 MiB 8.8 GiB 11.61 0.16 32 up
28 hdd 0.00999 1.00000 10 GiB 1.1 GiB 149 MiB 28 MiB 996 MiB 8.9 GiB 11.46 0.16 31 up
39 hdd 0.00999 1.00000 10 GiB 1.1 GiB 152 MiB 31 MiB 993 MiB 8.8 GiB 11.49 0.16 33 up
40 hdd 0.00999 1.00000 10 GiB 1.1 GiB 142 MiB 33 MiB 991 MiB 8.9 GiB 11.39 0.16 31 up
41 hdd 0.00999 1.00000 10 GiB 1.2 GiB 162 MiB 28 MiB 996 MiB 8.8 GiB 11.58 0.16 32 up
3 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 723 KiB 19 GiB 2.4 TiB 73.63 1.01 257 up
4 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.6 TiB 5.4 MiB 19 GiB 2.4 TiB 73.23 1.00 257 up
5 hdd 9.09599 1.00000 9.1 TiB 6.9 TiB 6.9 TiB 601 KiB 20 GiB 2.2 TiB 76.22 1.04 265 up
6 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 624 KiB 20 GiB 2.4 TiB 73.98 1.01 256 up
7 hdd 9.09599 1.00000 9.1 TiB 6.9 TiB 6.9 TiB 4.9 MiB 20 GiB 2.2 TiB 75.63 1.03 265 up
8 hdd 9.09599 1.00000 9.1 TiB 6.9 TiB 6.9 TiB 591 KiB 20 GiB 2.2 TiB 76.05 1.04 265 up
9 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 2.7 MiB 19 GiB 2.4 TiB 73.69 1.01 257 up
10 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 1.0 MiB 20 GiB 2.4 TiB 73.53 1.00 256 up
11 hdd 9.09599 1.00000 9.1 TiB 6.6 TiB 6.6 TiB 5.7 MiB 19 GiB 2.5 TiB 72.64 0.99 251 up
12 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 1.6 MiB 20 GiB 2.4 TiB 73.95 1.01 257 up
13 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 2.3 MiB 19 GiB 2.4 TiB 73.43 1.00 257 up
14 hdd 9.09599 1.00000 9.1 TiB 6.8 TiB 6.8 TiB 1.5 MiB 20 GiB 2.3 TiB 74.56 1.02 261 up
15 hdd 9.09599 1.00000 9.1 TiB 6.8 TiB 6.8 TiB 1.9 MiB 20 GiB 2.3 TiB 74.81 1.02 262 up
16 hdd 9.09599 1.00000 9.1 TiB 6.8 TiB 6.8 TiB 2.0 MiB 20 GiB 2.3 TiB 74.46 1.02 261 up
17 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 761 KiB 19 GiB 2.4 TiB 73.43 1.00 256 up
18 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 1.8 MiB 19 GiB 2.4 TiB 73.50 1.00 257 up
19 hdd 9.09599 1.00000 9.1 TiB 6.8 TiB 6.7 TiB 3.9 MiB 19 GiB 2.3 TiB 74.25 1.01 261 up
20 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 950 KiB 19 GiB 2.4 TiB 73.70 1.01 257 up
21 hdd 9.09599 1.00000 9.1 TiB 6.8 TiB 6.7 TiB 2.4 MiB 20 GiB 2.3 TiB 74.42 1.02 260 up
22 hdd 9.09599 1.00000 9.1 TiB 6.7 TiB 6.7 TiB 840 KiB 20 GiB 2.4 TiB 73.59 1.01 256 up
29 hdd 9.09599 1.00000 9.1 TiB 6.5 TiB 6.4 TiB 289 KiB 19 GiB 2.6 TiB 71.03 0.97 249 up
30 hdd 9.09599 1.00000 9.1 TiB 6.5 TiB 6.5 TiB 2.1 MiB 19 GiB 2.6 TiB 71.85 0.98 253 up
31 hdd 9.09599 1.00000 9.1 TiB 6.5 TiB 6.5 TiB 1.2 MiB 19 GiB 2.6 TiB 71.69 0.98 251 up
32 hdd 9.09599 1.00000 9.1 TiB 6.6 TiB 6.6 TiB 26 KiB 19 GiB 2.5 TiB 72.71 0.99 255 up
33 hdd 9.09599 1.00000 9.1 TiB 6.5 TiB 6.5 TiB 737 KiB 19 GiB 2.6 TiB 71.88 0.98 252 up
34 hdd 9.09599 1.00000 9.1 TiB 6.6 TiB 6.6 TiB 823 KiB 19 GiB 2.5 TiB 72.24 0.99 253 up
35 hdd 9.09599 1.00000 9.1 TiB 6.4 TiB 6.4 TiB 1.1 MiB 18 GiB 2.7 TiB 70.86 0.97 248 up
36 hdd 9.09599 1.00000 9.1 TiB 6.1 TiB 6.1 TiB 1.5 MiB 18 GiB 3.0 TiB 67.01 0.92 236 up
37 hdd 9.09599 1.00000 9.1 TiB 6.6 TiB 6.6 TiB 1.7 MiB 19 GiB 2.5 TiB 72.82 0.99 256 up
38 hdd 9.09599 1.00000 9.1 TiB 6.5 TiB 6.5 TiB 2.5 MiB 19 GiB 2.6 TiB 71.95 0.98 253 up
0 hdd 0.00999 1.00000 10 GiB 1.2 GiB 161 MiB 29 MiB 995 MiB 8.8 GiB 11.58 0.16 32 up
1 hdd 0.00999 1.00000 10 GiB 1.2 GiB 154 MiB 35 MiB 989 MiB 8.8 GiB 11.51 0.16 33 up
2 hdd 0.00999 1.00000 10 GiB 1.1 GiB 141 MiB 29 MiB 995 MiB 8.9 GiB 11.38 0.16 31 up
26 hdd 0.00999 1.00000 10 GiB 1.1 GiB 143 MiB 40 MiB 984 MiB 8.9 GiB 11.40 0.16 33 up
27 hdd 0.00999 1.00000 10 GiB 1.2 GiB 164 MiB 29 MiB 995 MiB 8.8 GiB 11.61 0.16 32 up
28 hdd 0.00999 1.00000 10 GiB 1.1 GiB 149 MiB 28 MiB 996 MiB 8.9 GiB 11.46 0.16 31 up
39 hdd 0.00999 1.00000 10 GiB 1.1 GiB 152 MiB 31 MiB 993 MiB 8.8 GiB 11.49 0.16 33 up
40 hdd 0.00999 1.00000 10 GiB 1.1 GiB 142 MiB 33 MiB 991 MiB 8.9 GiB 11.39 0.16 31 up
41 hdd 0.00999 1.00000 10 GiB 1.2 GiB 162 MiB 28 MiB 996 MiB 8.8 GiB 11.58 0.16 32 up
TOTAL 273 TiB 200 TiB 199 TiB 336 MiB 588 GiB 73 TiB 73.21
Thanks
---- On Tue, 05 May 2020 14:23:54 +0800 Ashley Merrick <singapore@xxxxxxxxxxxxxx> wrote ----
I have a cluster running 15.2.1, was originally running 14.x, the cluster is running the balance module in upmap mode (I have tried crush-compat in the past)
Most OSD's are around the same & used give or take 0.x, however there is one OSD that is down a good few % and a few that are above average by 1 or 2 %, I have been trying to get the balance to fix this.
I have tried running a manual osdmaptool command on an export of my map, but it lists no fixed however does display the underfall OSD in it's output (overfull 3,4,5,6,7,8,9,10,11,12,13,14,15,18,19,20 underfull [36])
The debug output is just lots of:
2020-05-05T06:15:39.172+0000 7f3dfb0c3c40 10 trying 2.55
2020-05-05T06:15:39.172+0000 7f3dfb0c3c40 10 2.55 [12,3,7,6,33,34,30,35,21,18] -> [12,3,7,6,33,34,30,35,21,16]
2020-05-05T06:15:39.172+0000 7f3dfb0c3c40 10 will try adding new remapping pair 18 -> 16 for 2.55 NOT selected osd
2020-05-05T06:15:39.172+0000 7f3dfb0c3c40 10 stddev 528.667 -> 528.667
2020-05-05T06:15:39.172+0000 7f3dfb0c3c40 10 Overfull search osd.7 target 170.667 deviation 9.33327
Is there anything I can to try and balance the overfull onto the underful OSDs to balance out the last bit.
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
device 20 osd.20 class hdd
device 21 osd.21 class hdd
device 22 osd.22 class hdd
device 26 osd.26 class hdd
device 27 osd.27 class hdd
device 28 osd.28 class hdd
device 29 osd.29 class hdd
device 30 osd.30 class hdd
device 31 osd.31 class hdd
device 32 osd.32 class hdd
device 33 osd.33 class hdd
device 34 osd.34 class hdd
device 35 osd.35 class hdd
device 36 osd.36 class hdd
device 37 osd.37 class hdd
device 38 osd.38 class hdd
device 39 osd.39 class hdd
device 40 osd.40 class hdd
device 41 osd.41 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 region
type 10 root
# buckets
host sn-m01 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 0.030
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.010
item osd.1 weight 0.010
item osd.2 weight 0.010
}
host sn-m03 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.030
alg straw2
hash 0 # rjenkins1
item osd.26 weight 0.010
item osd.27 weight 0.010
item osd.28 weight 0.010
}
host sn-m04 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.030
alg straw2
hash 0 # rjenkins1
item osd.39 weight 0.010
item osd.40 weight 0.010
item osd.41 weight 0.010
}
root meta {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 0.087
alg straw2
hash 0 # rjenkins1
item sn-m01 weight 0.029
item sn-m03 weight 0.029
item sn-m04 weight 0.029
}
host sn-s01 {
id -5 # do not change unnecessarily
id -7 class hdd # do not change unnecessarily
# weight 90.960
alg straw2
hash 0 # rjenkins1
item osd.3 weight 9.096
item osd.4 weight 9.096
item osd.5 weight 9.096
item osd.6 weight 9.096
item osd.7 weight 9.096
item osd.8 weight 9.096
item osd.9 weight 9.096
item osd.10 weight 9.096
item osd.11 weight 9.096
item osd.12 weight 9.096
}
host sn-s02 {
id -9 # do not change unnecessarily
id -10 class hdd # do not change unnecessarily
# weight 90.960
alg straw2
hash 0 # rjenkins1
item osd.13 weight 9.096
item osd.14 weight 9.096
item osd.15 weight 9.096
item osd.16 weight 9.096
item osd.17 weight 9.096
item osd.18 weight 9.096
item osd.19 weight 9.096
item osd.20 weight 9.096
item osd.21 weight 9.096
item osd.22 weight 9.096
}
host sn-s03 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 90.960
alg straw2
hash 0 # rjenkins1
item osd.29 weight 9.096
item osd.30 weight 9.096
item osd.31 weight 9.096
item osd.32 weight 9.096
item osd.33 weight 9.096
item osd.34 weight 9.096
item osd.35 weight 9.096
item osd.36 weight 9.096
item osd.37 weight 9.096
item osd.38 weight 9.096
}
root ec {
id -6 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 272.871
alg straw2
hash 0 # rjenkins1
item sn-s01 weight 90.957
item sn-s02 weight 90.957
item sn-s03 weight 90.957
}
root default {
id -12 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.058
alg straw2
hash 0 # rjenkins1
item sn-m03 weight 0.029
item sn-m04 weight 0.029
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take meta
step choose indep 0 type host
step chooseleaf indep 1 type osd
step emit
}
rule ec_rule {
id 2
type erasure
min_size 3
max_size 10
step set_chooseleaf_tries 5
step set_choose_tries 100
step take ec
step choose indep 0 type host
step chooseleaf indep 4 type osd
step emit
}
# choose_args
choose_args 18446744073709551615 {
{
bucket_id -1
weight_set [
[ 0.030 0.030 0.030 ]
]
}
{
bucket_id -2
weight_set [
[ 0.030 0.030 0.030 ]
]
}
{
bucket_id -3
weight_set [
[ 0.010 0.010 0.010 ]
]
}
{
bucket_id -4
weight_set [
[ 0.010 0.010 0.010 ]
]
}
{
bucket_id -5
weight_set [
[ 9.035 9.028 9.015 9.029 8.980 8.996 9.035 9.034 9.028 9.035 ]
]
}
{
bucket_id -6
weight_set [
[ 90.215 91.263 91.393 ]
]
}
{
bucket_id -7
weight_set [
[ 9.035 9.028 9.015 9.029 8.980 8.996 9.035 9.034 9.028 9.035 ]
]
}
{
bucket_id -8
weight_set [
[ 90.215 91.263 91.393 ]
]
}
{
bucket_id -9
weight_set [
[ 9.129 9.129 9.127 9.125 9.129 9.129 9.121 9.125 9.125 9.124 ]
]
}
{
bucket_id -10
weight_set [
[ 9.129 9.129 9.127 9.125 9.129 9.129 9.121 9.125 9.125 9.124 ]
]
}
{
bucket_id -12
weight_set [
[ 0.030 0.030 ]
]
}
{
bucket_id -14
weight_set [
[ 0.030 0.030 ]
]
}
{
bucket_id -15
weight_set [
[ 0.010 0.010 0.010 ]
]
}
{
bucket_id -16
weight_set [
[ 0.010 0.010 0.010 ]
]
}
{
bucket_id -17
weight_set [
[ 9.154 9.140 9.128 9.128 9.139 9.125 9.148 9.157 9.134 9.140 ]
]
}
{
bucket_id -18
weight_set [
[ 9.154 9.140 9.128 9.128 9.139 9.125 9.148 9.157 9.134 9.140 ]
]
}
{
bucket_id -19
weight_set [
[ 0.010 0.010 0.010 ]
]
}
{
bucket_id -20
weight_set [
[ 0.010 0.010 0.010 ]
]
}
}
# end crush map
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx