I observed that on an otherwise idle cluster, scrubbing cannot fully utilise the speed of my HDDs. `iostat` shows only 8-10 MB/s per disk, instead of the ~100 MB/s most HDDs can easily deliver. Changing scrubbing settings does not help (see below). Environment: * 6 active+clean+scrubbing+deep * Ceph version 16.2.7. * BlueStore * My cluster has many objects small objects ("402.32M objects, 38 TiB" from "ceph status") due to small files (4 - 32 KiB) on CephFS. `iostat -x 5` with default settings: Device r/s rkB/s rrqm/s %rrqm r_await rareq-sz w/s wkB/s wrqm/s %wrqm w_await wareq-sz d/s dkB/s drqm/s %drqm d_await dareq-sz f/s f_await aqu-sz %util dm-0 198.60 6878.40 0.00 0.00 12.78 34.63 51.80 2612.80 0.00 0.00 14.82 50.44 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3.30 91.24 dm-1 0.80 3.20 0.00 0.00 11.50 4.00 52.60 2582.40 0.00 0.00 13.69 49.10 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.73 3.78 dm-10 11.20 71.20 0.00 0.00 0.09 6.36 145.80 583.20 0.00 0.00 0.14 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.02 2.62 dm-11 192.60 6737.60 0.00 0.00 10.74 34.98 34.80 1684.80 0.00 0.00 11.47 48.41 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.47 91.40 dm-12 245.40 10194.40 0.00 0.00 9.43 41.54 21.20 575.20 0.00 0.00 3.94 27.13 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.40 87.92 dm-13 30.80 1772.80 0.00 0.00 11.61 57.56 78.80 4507.20 0.00 0.00 19.78 57.20 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.92 9.54 dm-14 3.20 24.80 0.00 0.00 0.12 7.75 125.20 500.80 0.00 0.00 0.12 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.18 dm-15 2.80 19.20 0.00 0.00 0.14 6.86 105.40 421.60 0.00 0.00 0.05 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 1.76 dm-16 0.80 6.40 0.00 0.00 0.00 8.00 111.00 444.00 0.00 0.00 0.10 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 1.82 dm-17 10.80 76.80 0.00 0.00 0.09 7.11 151.40 605.60 0.00 0.00 0.08 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.92 dm-18 10.20 67.20 0.00 0.00 0.08 6.59 115.60 462.40 0.00 0.00 0.04 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.16 dm-19 10.20 56.80 0.00 0.00 0.10 5.57 109.00 436.00 0.00 0.00 0.07 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.34 dm-2 4.80 435.20 0.00 0.00 0.12 90.67 751.80 6292.80 0.00 0.00 0.07 8.37 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.05 16.14 dm-20 0.40 2.40 0.00 0.00 0.00 6.00 265.00 2459.20 0.00 0.00 0.10 9.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.03 5.36 dm-21 191.00 6105.60 0.00 0.00 6.34 31.97 67.80 3748.00 0.00 0.00 19.56 55.28 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.54 42.22 dm-3 1.00 8.80 0.00 0.00 0.00 8.80 91.00 364.00 0.00 0.00 0.04 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.54 dm-4 167.60 4973.60 0.00 0.00 10.15 29.68 49.20 2511.20 0.00 0.00 11.39 51.04 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.26 89.18 dm-5 11.20 73.60 0.00 0.00 0.12 6.57 124.40 497.60 0.00 0.00 0.07 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.16 dm-6 27.20 1644.80 0.00 0.00 12.22 60.47 57.20 3316.80 0.00 0.00 15.93 57.99 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.24 6.78 dm-7 217.40 8032.80 0.00 0.00 12.04 36.95 64.40 3654.40 0.00 0.00 23.69 56.75 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4.14 97.28 dm-8 10.80 70.40 0.00 0.00 0.15 6.52 111.80 447.20 0.00 0.00 0.04 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.32 dm-9 1.60 8.00 0.00 0.00 13.25 5.00 46.60 2563.20 0.00 0.00 9.08 55.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.44 3.70 md127 1.60 107.20 0.00 0.00 0.00 67.00 142.00 1856.00 0.00 0.00 0.05 13.07 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.80 nvme0n1 42.00 772.00 0.00 0.00 0.10 18.38 1001.00 10503.50 451.20 31.07 0.03 10.49 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.04 25.20 nvme1n1 37.20 248.00 0.00 0.00 0.09 6.67 609.80 6723.50 369.00 37.70 0.04 11.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.03 15.80 sda 0.80 3.20 0.00 0.00 11.50 4.00 33.60 2582.40 19.00 36.12 8.43 76.86 0.00 0.00 0.00 0.00 0.00 0.00 3.20 0.25 0.29 3.78 sdb 117.80 10195.20 127.80 52.04 11.53 86.55 18.80 575.20 2.40 11.32 3.93 30.60 0.00 0.00 0.00 0.00 0.00 0.00 2.80 3.93 1.44 87.90 sdc 4.60 1644.80 22.60 83.09 11.74 357.57 23.60 3316.80 33.60 58.74 8.53 140.54 0.00 0.00 0.00 0.00 0.00 0.00 1.80 4.56 0.26 6.74 sdd 109.40 4975.20 58.40 34.80 12.19 45.48 22.00 2511.20 27.20 55.28 5.75 114.15 0.00 0.00 0.00 0.00 0.00 0.00 2.80 6.07 1.48 89.16 sde 115.40 6563.20 77.00 40.02 12.21 56.87 26.20 1684.80 8.60 24.71 7.82 64.31 0.00 0.00 0.00 0.00 0.00 0.00 1.80 6.78 1.63 91.36 sdf 6.40 1772.80 24.40 79.22 14.84 277.00 39.20 4507.20 39.60 50.25 13.93 114.98 0.00 0.00 0.00 0.00 0.00 0.00 3.20 2.25 0.65 9.54 sdg 121.60 8033.60 95.80 44.07 12.39 66.07 30.60 3654.40 33.80 52.48 12.47 119.42 0.00 0.00 0.00 0.00 0.00 0.00 2.60 8.77 1.91 97.24 sdh 122.00 6105.60 69.00 36.13 5.47 50.05 33.00 3748.00 34.80 51.33 16.94 113.58 0.00 0.00 0.00 0.00 0.00 0.00 2.20 5.00 1.24 42.20 sdi 117.00 6856.80 81.60 41.09 12.25 58.61 32.00 2612.80 19.80 38.22 10.80 81.65 0.00 0.00 0.00 0.00 0.00 0.00 2.60 8.46 1.80 91.18 sdj 1.60 8.00 0.00 0.00 13.12 5.00 31.20 2563.20 15.40 33.05 5.06 82.15 0.00 0.00 0.00 0.00 0.00 0.00 2.00 1.70 0.18 3.70 With settings osd_deep_scrub_stride = 4194304 osd_scrub_load_threshold = 20 osd_scrub_chunk_min = 15 osd_scrub_chunk_max = 75 osd_max_scrubs = 3 I get a slight improvement only: Device r/s rkB/s rrqm/s %rrqm r_await rareq-sz w/s wkB/s wrqm/s %wrqm w_await wareq-sz d/s dkB/s drqm/s %drqm d_await dareq-sz f/s f_await aqu-sz %util dm-0 400.60 14686.40 0.00 0.00 19.93 36.66 25.20 1197.60 0.00 0.00 27.44 47.52 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8.67 91.32 dm-1 362.60 10583.20 0.00 0.00 18.58 29.19 30.40 1742.40 0.00 0.00 23.32 57.32 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.45 98.14 dm-10 6.40 64.00 0.00 0.00 0.03 10.00 37.60 150.40 0.00 0.00 0.06 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.02 dm-11 76.60 1939.20 0.00 0.00 10.61 25.32 35.20 1885.60 0.00 0.00 22.32 53.57 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.60 29.06 dm-12 93.00 4178.40 0.00 0.00 16.04 44.93 4.40 96.00 0.00 0.00 0.68 21.82 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.50 19.04 dm-13 376.80 12716.00 0.00 0.00 15.90 33.75 27.20 1444.80 0.00 0.00 23.76 53.12 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6.64 97.76 dm-14 11.80 312.80 0.00 0.00 0.12 26.51 33.40 133.60 0.00 0.00 0.00 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.96 dm-15 7.00 430.40 0.00 0.00 0.17 61.49 46.20 184.80 0.00 0.00 0.06 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.94 dm-16 11.80 229.60 0.00 0.00 0.10 19.46 49.20 196.80 0.00 0.00 0.09 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 1.20 dm-17 4.00 39.20 0.00 0.00 0.10 9.80 29.60 118.40 0.00 0.00 0.00 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.72 dm-18 9.00 94.40 0.00 0.00 0.07 10.49 37.20 148.80 0.00 0.00 0.30 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.92 dm-19 7.40 70.40 0.00 0.00 0.11 9.51 56.80 227.20 0.00 0.00 0.02 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.04 dm-2 0.00 0.00 0.00 0.00 0.00 0.00 62.00 482.40 0.00 0.00 0.07 7.78 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.16 dm-20 12.40 2375.20 0.00 0.00 0.23 191.55 314.20 1524.80 0.00 0.00 0.03 4.85 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 7.90 dm-21 321.00 9468.00 0.00 0.00 7.86 29.50 31.20 1630.40 0.00 0.00 11.69 52.26 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.89 42.84 dm-3 12.40 125.60 0.00 0.00 0.08 10.13 37.00 148.00 0.00 0.00 0.02 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.84 dm-4 374.00 13150.40 0.00 0.00 18.31 35.16 23.60 1100.80 0.00 0.00 12.42 46.64 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.14 96.70 dm-5 8.00 84.00 0.00 0.00 0.10 10.50 38.00 152.00 0.00 0.00 0.03 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 1.00 dm-6 201.60 6619.20 0.00 0.00 11.56 32.83 3.40 108.80 0.00 0.00 2.29 32.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.34 58.24 dm-7 414.40 14476.00 0.00 0.00 21.99 34.93 10.80 235.20 0.00 0.00 5.19 21.78 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 9.17 98.00 dm-8 0.60 14.40 0.00 0.00 0.00 24.00 40.80 163.20 0.00 0.00 0.04 4.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.70 dm-9 478.00 17300.80 0.00 0.00 23.06 36.19 4.40 92.00 0.00 0.00 5.00 20.91 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 11.05 98.00 md127 5.00 64.80 0.00 0.00 0.04 12.96 133.80 2014.40 0.00 0.00 0.08 15.06 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 2.32 nvme0n1 33.60 822.40 0.00 0.00 0.09 24.48 252.40 3388.10 139.40 35.58 0.04 13.42 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 7.24 nvme1n1 62.60 3082.40 0.00 0.00 0.10 49.24 427.40 4272.90 177.20 29.31 0.03 10.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.02 13.70 sda 236.20 10613.60 126.20 34.82 15.19 44.93 11.80 1742.40 18.60 61.18 13.83 147.66 0.00 0.00 0.00 0.00 0.00 0.00 1.40 20.29 3.78 98.14 sdb 35.80 4166.40 56.80 61.34 10.03 116.38 4.20 96.00 0.20 4.55 0.81 22.86 0.00 0.00 0.00 0.00 0.00 0.00 0.80 0.25 0.36 19.04 sdc 118.20 6612.80 83.00 41.25 9.15 55.95 3.00 108.80 0.40 11.76 2.07 36.27 0.00 0.00 0.00 0.00 0.00 0.00 0.60 3.67 1.09 58.20 sdd 207.00 13152.00 166.60 44.59 13.92 63.54 15.80 1100.80 7.80 33.05 6.90 69.67 0.00 0.00 0.00 0.00 0.00 0.00 1.80 9.67 3.01 96.66 sde 55.80 1939.20 20.80 27.15 9.06 34.75 17.40 1885.60 17.80 50.57 7.51 108.37 0.00 0.00 0.00 0.00 0.00 0.00 1.80 1.44 0.64 29.04 sdf 221.40 12720.00 155.20 41.21 14.34 57.45 14.60 1444.80 12.60 46.32 10.51 98.96 0.00 0.00 0.00 0.00 0.00 0.00 1.40 11.71 3.35 97.70 sdg 234.60 14490.40 179.80 43.39 15.66 61.77 9.40 235.20 1.20 11.32 5.36 25.02 0.00 0.00 0.00 0.00 0.00 0.00 2.00 11.10 3.75 97.98 sdh 212.20 9470.40 109.20 33.98 4.05 44.63 19.40 1630.40 11.80 37.82 6.41 84.04 0.00 0.00 0.00 0.00 0.00 0.00 2.00 4.10 0.99 42.78 sdi 214.00 14684.00 186.60 46.58 14.46 68.62 10.00 1197.60 15.20 60.32 11.14 119.76 0.00 0.00 0.00 0.00 0.00 0.00 2.40 13.67 3.24 91.28 sdj 250.60 17296.80 227.00 47.53 18.76 69.02 3.60 92.00 0.80 18.18 5.72 25.56 0.00 0.00 0.00 0.00 0.00 0.00 0.80 16.25 4.73 98.00 `osd_max_scrubs` creates more reads per second (~120 -> ~220), but does not proportionately increase read throughput (as is expected on a spinning disk). Overall, this looks like scrub operations are seek-bound. Looking at `strace -fyp 640105 -e io_submit` output, for lines that involve my HDD `/dev/dm-12`: io_submit(0x7fd5f6668000, 2, [{aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd55baa3000, iov_len=4096}], aio_offset=1654046691328}, {aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f378000, iov_len=24576}], aio_offset=1654046666752}]) = 2 io_submit(0x7fd5f6668000, 2, [{aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f262000, iov_len=4096}], aio_offset=1934563819520}, {aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f28b000, iov_len=4096}], aio_offset=1934563823616}]) = 2 io_submit(0x7fd5f6668000, 2, [{aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f3c7000, iov_len=28672}], aio_offset=2871307956224}, {aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f2d2000, iov_len=4096}], aio_offset=2871307984896}]) = 2 io_submit(0x7fd5f6668000, 2, [{aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f34a000, iov_len=8192}], aio_offset=4056494669824}, {aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f35f000, iov_len=4096}], aio_offset=4056494678016}]) = 2 io_submit(0x7fd5f6668000, 2, [{aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f3e3000, iov_len=53248}], aio_offset=1233895051264}, {aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f0c6000, iov_len=4096}], aio_offset=1233895104512}]) = 2 io_submit(0x7fd5f6668000, 2, [{aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f1e2000, iov_len=4096}], aio_offset=2155792314368}, {aio_data=0, aio_lio_opcode=IOCB_CMD_PREADV, aio_fildes=30</dev/dm-12>, aio_buf=[{iov_base=0x7fd54f303000, iov_len=24576}], aio_offset=2155792289792}]) = 2 This looks like it's reading the HDD all over the place, with small reads (`aio_offset`, `iov_len`). Does Ceph scrubbing do a HDD seek for every object? I had imagined that scrubbing might be able to read linearly through the disk (perhaps skipping larger gaps). Is that wrong? Thanks! _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx