Hi, I know this topic seems to be handled a lot (as far as I can see), but I reached the end of my google_foo. * We have OSDs that are near full, but there are also OSDs that are only loaded with 50%. * We have 4,8,16 TB rotating disks in the cluster. * The disks that get packed are 4TB disks and very empty disks are also 4TB * The OSD nodes are all around the same total disk space (51 - 59) * The balancer tells me that it can not find further optimization, or that pg_num is decreasin. How can I debug further before the cluster goes into a bad state? [root@s3db1 ~]# ceph osd df tree | sort -nk 17 | head -n 30 ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME MIN/MAX VAR: 0.75/1.23 STDDEV: 6.96 TOTAL 673 TiB 474 TiB 452 TiB 100 GiB 1.2 TiB 199 TiB 70.39 -3 58.49872 - 58 TiB 39 TiB 36 TiB 8.2 GiB 85 GiB 19 TiB 67.40 0.96 - host s3db2 -4 58.49872 - 58 TiB 40 TiB 35 TiB 35 GiB 81 GiB 19 TiB 67.91 0.96 - host s3db3 -11 50.94173 - 51 TiB 35 TiB 35 TiB 3.5 GiB 94 GiB 16 TiB 68.00 0.97 - host s3db10 -10 51.28331 - 51 TiB 35 TiB 35 TiB 4.6 GiB 93 GiB 16 TiB 69.16 0.98 - host s3db9 -6 58.89636 - 59 TiB 41 TiB 40 TiB 2.4 GiB 102 GiB 18 TiB 69.15 0.98 - host s3db5 -12 50.99052 - 51 TiB 36 TiB 36 TiB 1.8 GiB 93 GiB 15 TiB 69.99 0.99 - host s3db11 -2 58.20561 - 58 TiB 41 TiB 37 TiB 9.6 GiB 96 GiB 17 TiB 70.00 0.99 - host s3db1 -1 673.44452 - 673 TiB 474 TiB 452 TiB 100 GiB 1.2 TiB 199 TiB 70.39 1.00 - root default -5 58.49872 - 58 TiB 42 TiB 35 TiB 7.0 GiB 94 GiB 17 TiB 71.06 1.01 - host s3db4 -8 58.89636 - 59 TiB 42 TiB 42 TiB 3.6 GiB 108 GiB 17 TiB 71.91 1.02 - host s3db7 -7 58.89636 - 59 TiB 43 TiB 42 TiB 15 GiB 120 GiB 16 TiB 72.69 1.03 - host s3db6 -37 58.55478 - 59 TiB 43 TiB 43 TiB 4.4 GiB 117 GiB 16 TiB 73.18 1.04 - host s3db12 -9 51.28331 - 51 TiB 38 TiB 38 TiB 4.9 GiB 103 GiB 13 TiB 74.18 1.05 - host s3db8 15 hdd 3.63689 1.00000 3.6 TiB 1.9 TiB 1.7 TiB 2.1 GiB 0 B 1.7 TiB 52.87 0.75 45 up osd.15 6 hdd 3.63689 1.00000 3.6 TiB 1.9 TiB 1.7 TiB 1.7 GiB 0 B 1.7 TiB 52.90 0.75 46 up osd.6 12 hdd 3.63689 1.00000 3.6 TiB 1.9 TiB 1.7 TiB 570 MiB 0 B 1.7 TiB 53.04 0.75 41 up osd.12 81 hdd 3.63689 1.00000 3.6 TiB 2.0 TiB 1.7 TiB 895 MiB 0 B 1.7 TiB 54.26 0.77 51 up osd.81 27 hdd 3.73630 1.00000 3.7 TiB 2.1 TiB 2.0 TiB 6.8 MiB 5.8 GiB 1.6 TiB 56.12 0.80 47 up osd.27 3 hdd 3.63689 1.00000 3.6 TiB 2.1 TiB 1.6 TiB 510 MiB 0 B 1.6 TiB 57.04 0.81 51 up osd.3 5 hdd 3.63689 1.00000 3.6 TiB 2.1 TiB 1.5 TiB 431 MiB 0 B 1.5 TiB 57.88 0.82 49 up osd.5 80 hdd 3.63689 1.00000 3.6 TiB 2.1 TiB 1.5 TiB 1.8 GiB 0 B 1.5 TiB 58.31 0.83 51 up osd.80 25 hdd 3.73630 1.00000 3.7 TiB 2.2 TiB 2.1 TiB 4.1 MiB 6.1 GiB 1.5 TiB 58.91 0.84 39 up osd.25 0 hdd 3.73630 1.00000 3.7 TiB 2.2 TiB 2.1 TiB 83 MiB 6.2 GiB 1.5 TiB 60.03 0.85 46 up osd.0 79 hdd 3.63689 1.00000 3.6 TiB 2.3 TiB 1.4 TiB 1.8 GiB 0 B 1.4 TiB 62.53 0.89 47 up osd.79 61 hdd 7.32619 1.00000 7.3 TiB 4.6 TiB 4.6 TiB 1.1 GiB 12 GiB 2.7 TiB 62.80 0.89 101 up osd.61 67 hdd 7.27739 1.00000 7.3 TiB 4.6 TiB 4.6 TiB 557 MiB 13 GiB 2.7 TiB 63.29 0.90 96 up osd.67 72 hdd 7.32619 1.00000 7.3 TiB 4.6 TiB 4.6 TiB 107 MiB 11 GiB 2.7 TiB 63.36 0.90 87 up osd.72 [root@s3db1 ~]# ceph osd df tree | sort -nk 17 | tail 51 hdd 7.27739 1.00000 7.3 TiB 5.6 TiB 5.5 TiB 724 MiB 14 GiB 1.7 TiB 76.34 1.08 105 up osd.51 71 hdd 3.68750 1.00000 3.7 TiB 2.8 TiB 2.8 TiB 3.7 MiB 7.8 GiB 867 GiB 77.04 1.09 47 up osd.71 82 hdd 3.63689 1.00000 3.6 TiB 2.8 TiB 839 GiB 628 MiB 0 B 839 GiB 77.48 1.10 45 up osd.82 14 hdd 3.63689 1.00000 3.6 TiB 2.9 TiB 777 GiB 18 GiB 0 B 777 GiB 79.14 1.12 59 up osd.14 4 hdd 3.63689 1.00000 3.6 TiB 2.9 TiB 752 GiB 826 MiB 0 B 752 GiB 79.80 1.13 53 up osd.4 75 hdd 3.68750 1.00000 3.7 TiB 2.9 TiB 2.9 TiB 523 MiB 8.2 GiB 757 GiB 79.95 1.14 53 up osd.75 76 hdd 3.68750 1.00000 3.7 TiB 3.0 TiB 3.0 TiB 237 MiB 9.2 GiB 668 GiB 82.30 1.17 50 up osd.76 33 hdd 3.73630 1.00000 3.7 TiB 3.1 TiB 3.0 TiB 380 MiB 8.5 GiB 671 GiB 82.46 1.17 57 up osd.33 34 hdd 3.73630 1.00000 3.7 TiB 3.1 TiB 3.0 TiB 464 MiB 8.4 GiB 605 GiB 84.18 1.20 60 up osd.34 35 hdd 3.73630 1.00000 3.7 TiB 3.2 TiB 3.1 TiB 352 MiB 8.7 GiB 515 GiB 86.55 1.23 53 up osd.35 [root@s3db1 ~]# ceph balancer status { "last_optimize_duration": "0:00:00.020142", "plans": [], "mode": "upmap", "active": true, "optimize_result": "Unable to find further optimization, or pool(s) pg_num is decreasing, or distribution is already perfect", "last_optimize_started": "Thu Mar 11 13:42:32 2021" } [root@s3db1 ~]# ceph df RAW STORAGE: CLASS SIZE AVAIL USED RAW USED %RAW USED hdd 673 TiB 199 TiB 474 TiB 474 TiB 70.41 TOTAL 673 TiB 199 TiB 474 TiB 474 TiB 70.41 POOLS: POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL rbd 0 64 0 B 0 0 B 0 19 TiB .rgw.root 1 64 98 KiB 118 98 KiB 0 19 TiB eu-central-1.rgw.control 2 64 0 B 8 0 B 0 19 TiB eu-central-1.rgw.data.root 3 64 1022 KiB 3.02k 1022 KiB 0 19 TiB eu-central-1.rgw.gc 4 64 84 MiB 32 84 MiB 0 19 TiB eu-central-1.rgw.log 5 64 220 MiB 564 220 MiB 0 19 TiB eu-central-1.rgw.users.uid 6 64 2.8 MiB 6.89k 2.8 MiB 0 19 TiB eu-central-1.rgw.users.keys 7 64 262 KiB 6.70k 262 KiB 0 19 TiB eu-central-1.rgw.meta 8 64 384 KiB 1k 384 KiB 0 19 TiB eu-central-1.rgw.users.email 9 64 40 B 1 40 B 0 19 TiB eu-central-1.rgw.buckets.index 10 64 10 GiB 67.54k 10 GiB 0.02 19 TiB eu-central-1.rgw.buckets.data 11 1024 151 TiB 106.48M 151 TiB 72.67 19 TiB eu-central-1.rgw.buckets.non-ec 12 64 268 MiB 13.03k 268 MiB 0 19 TiB eu-central-1.rgw.usage 13 64 501 MiB 32 501 MiB 0 19 TiB eu-msg-1.rgw.control 56 64 0 B 8 0 B 0 19 TiB eu-msg-1.rgw.data.root 57 64 71 KiB 221 71 KiB 0 19 TiB eu-msg-1.rgw.gc 58 64 60 KiB 32 60 KiB 0 19 TiB eu-msg-1.rgw.log 59 64 835 KiB 242 835 KiB 0 19 TiB eu-msg-1.rgw.users.uid 60 64 56 KiB 107 56 KiB 0 19 TiB eu-msg-1.rgw.usage 61 64 36 MiB 25 36 MiB 0 19 TiB eu-msg-1.rgw.users.keys 62 64 3.8 KiB 97 3.8 KiB 0 19 TiB eu-msg-1.rgw.meta 63 64 600 KiB 1.58k 600 KiB 0 19 TiB eu-msg-1.rgw.buckets.index 64 64 46 MiB 112 46 MiB 0 19 TiB eu-msg-1.rgw.users.email 65 64 0 B 0 0 B 0 19 TiB eu-msg-1.rgw.buckets.data 66 64 2.8 TiB 1.14M 2.8 TiB 4.76 19 TiB eu-msg-1.rgw.buckets.non-ec 67 64 2.2 MiB 353 2.2 MiB 0 19 TiB default.rgw.control 69 32 0 B 8 0 B 0 19 TiB default.rgw.data.root 70 32 0 B 0 0 B 0 19 TiB default.rgw.gc 71 32 0 B 0 0 B 0 19 TiB default.rgw.log 72 32 0 B 0 0 B 0 19 TiB default.rgw.users.uid 73 32 0 B 0 0 B 0 19 TiB fra-1.rgw.control 74 32 0 B 8 0 B 0 19 TiB fra-1.rgw.meta 75 32 0 B 0 0 B 0 19 TiB fra-1.rgw.log 76 32 50 B 28 50 B 0 19 TiB whitespace-again-2021-03-10 77 64 111 MiB 363.94k 111 MiB 0 19 TiB whitespace-again-2021-03-10_2 78 32 18 KiB 59 18 KiB 0 19 TiB whitespace-again-2021-03-10_3 79 32 11 KiB 36 11 KiB 0 19 TiB -- Die Selbsthilfegruppe "UTF-8-Probleme" trifft sich diesmal abweichend im groüen Saal. _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx