> First of all I'd like to clarify what exact command are you using to > assess the fragmentation. There are two options: "bluestore allocator > score" and "bluestore allocator fragmentation" I am using this one : "ceph daemon osd.$i bluestore allocator score block" > Both are not very accurate though but it would be interesting to have > both numbers for the case with presumably high fragmentation. Here are numbers from single server to keep email shorter but almost exact scores are on other 2 nodes , I recreated OSD's 0,1 46h ago and they were perfect , now already extra slow and fragmented: for i in 5 9 0 1 ; do echo $i ; ceph daemon osd.$i bluestore allocator score block ; done > > 5 > { > "fragmentation_rating": 0.29451514185657074 > } > 9 > { > "fragmentation_rating": 0.29940778224909959 > } > 0 > { > "fragmentation_rating": 0.84247390671066713 > } > 1 > { > "fragmentation_rating": 0.78098161172652247 > } for i in 5 9 0 1 ; do echo $i ; ceph daemon osd.$i bluestore allocator fragmentation block ; done > 5 > { > "fragmentation_rating": 0.0055253213950322861 > } > 9 > { > "fragmentation_rating": 0.0053455960516075665 > } > 0 > { > "fragmentation_rating": 0.014439265895713198 > } > 1 > { > "fragmentation_rating": 0.013245320572893494 > } > In this respect could you please try to switch bluestore and bluefs > allocators to bitmap and run some smoke benchmarking again. Can i change this on live server (is there possibility of losing data etc )? Can you please share correct procedure. > Additionally you might want to upgrade to 15.2.16 which includes a bunch > of improvements for Avl/Hybrid allocators tail latency numbers as per > the ticket above. Atm we use pve repository where 15.2.15 is latest , I will need to either wait for .16 from them or create second cluster without proxmox but would like to test on existing. Is there any difference between pve ceph and regular so i can change repo and install over existing ? > And finally it would be great to get bluestore performance counters for > both good and bad benchmarks. This can be obtained via: ceph tell osd.N > perf dump bluestore > > but please reset the counters before each benchmarking with: ceph tell > osd.N perf reset all DATEBENCH=$(date +"%Y-%m-%d-%H-%M-%S") && ceph tell osd.0 perf reset all && ceph tell osd.0 bench >> /root/ceph_osd_bench_results/$DATEBENCH-perf-dump-bluestore-osd-0-and-bench-fragmented.log && ceph tell osd.0 perf dump bluestore >> /root/ceph_osd_bench_results/$DATEBENCH-perf-dump-bluestore-osd-0-and-bench-fragmented.log { > "bytes_written": 1073741824, > "blocksize": 4194304, > "elapsed_sec": 1.1402847469999999, > "bytes_per_sec": 941643591.06348729, > "iops": 224.50532700144942 > } > { > "bluestore": { > "kv_flush_lat": { > "avgcount": 142, > "sum": 0.000820554, > "avgtime": 0.000005778 > }, > "kv_commit_lat": { > "avgcount": 142, > "sum": 1.208369108, > "avgtime": 0.008509641 > }, > "kv_sync_lat": { > "avgcount": 142, > "sum": 1.209189662, > "avgtime": 0.008515420 > }, > "kv_final_lat": { > "avgcount": 141, > "sum": 0.044558120, > "avgtime": 0.000316015 > }, > "state_prepare_lat": { > "avgcount": 407, > "sum": 1.443276139, > "avgtime": 0.003546133 > }, > "state_aio_wait_lat": { > "avgcount": 407, > "sum": 12.148961431, > "avgtime": 0.029850028 > }, > "state_io_done_lat": { > "avgcount": 407, > "sum": 0.009644771, > "avgtime": 0.000023697 > }, > "state_kv_queued_lat": { > "avgcount": 407, > "sum": 5.441919173, > "avgtime": 0.013370808 > }, > "state_kv_commiting_lat": { > "avgcount": 407, > "sum": 8.541078753, > "avgtime": 0.020985451 > }, > "state_kv_done_lat": { > "avgcount": 407, > "sum": 0.000117127, > "avgtime": 0.000000287 > }, > "state_deferred_queued_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "state_deferred_aio_wait_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "state_deferred_cleanup_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "state_finishing_lat": { > "avgcount": 407, > "sum": 0.000041350, > "avgtime": 0.000000101 > }, > "state_done_lat": { > "avgcount": 407, > "sum": 0.033037493, > "avgtime": 0.000081173 > }, > "throttle_lat": { > "avgcount": 407, > "sum": 1.196686027, > "avgtime": 0.002940260 > }, > "submit_lat": { > "avgcount": 407, > "sum": 1.550149893, > "avgtime": 0.003808722 > }, > "commit_lat": { > "avgcount": 407, > "sum": 27.584937706, > "avgtime": 0.067776259 > }, > "read_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "read_onode_meta_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "read_wait_aio_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "compress_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "decompress_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "csum_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "compress_success_count": 0, > "compress_rejected_count": 0, > "write_pad_bytes": 0, > "deferred_write_ops": 0, > "deferred_write_bytes": 0, > "write_penalty_read_ops": 0, > "bluestore_allocated": 325989441536, > "bluestore_stored": 325985609973, > "bluestore_compressed": 0, > "bluestore_compressed_allocated": 0, > "bluestore_compressed_original": 0, > "bluestore_onodes": 39628, > "bluestore_pinned_onodes": 2, > "bluestore_onode_hits": 556, > "bluestore_onode_misses": 256, > "bluestore_onode_shard_hits": 2971, > "bluestore_onode_shard_misses": 0, > "bluestore_extents": 1014903, > "bluestore_blobs": 1014603, > "bluestore_buffers": 10984, > "bluestore_buffer_bytes": 121667584, > "bluestore_buffer_hit_bytes": 0, > "bluestore_buffer_miss_bytes": 0, > "bluestore_write_big": 406, > "bluestore_write_big_bytes": 1079267328, > "bluestore_write_big_blobs": 16606, > "bluestore_write_small": 0, > "bluestore_write_small_bytes": 0, > "bluestore_write_small_unused": 0, > "bluestore_write_small_deferred": 0, > "bluestore_write_small_pre_read": 0, > "bluestore_write_small_new": 16606, > "bluestore_txc": 407, > "bluestore_onode_reshard": 256, > "bluestore_blob_split": 0, > "bluestore_extent_compress": 269, > "bluestore_gc_merged": 0, > "bluestore_read_eio": 0, > "bluestore_reads_with_retries": 0, > "bluestore_fragmentation_micros": 14, > "omap_seek_to_first_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_upper_bound_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_lower_bound_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_next_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_get_keys_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_get_values_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "clist_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "remove_lat": { > "avgcount": 256, > "sum": 0.042811794, > "avgtime": 0.000167233 > } > } > } > I recreated now OSD's 0,1 : for i in 5 9 0 1 ; do echo $i ; ceph daemon osd.$i bluestore allocator score block ; done > 5 > { > "fragmentation_rating": 0.29450522022006459 > } > 9 > { > "fragmentation_rating": 0.29937167485432309 > } > 0 > { > "fragmentation_rating": 0.080095423473327848 > } > 1 > { > "fragmentation_rating": 0.080977251877914366 > } for i in 5 9 0 1 ; do echo $i ; ceph daemon osd.$i bluestore allocator fragmentation block ; done > 5 > { > "fragmentation_rating": 0.0055241045678238445 > } > 9 > { > "fragmentation_rating": 0.0053710095717623045 > } > 0 > { > "fragmentation_rating": 0.00056595674886284169 > } > 1 > { > "fragmentation_rating": 0.00034820633661578006 > } DATEBENCH=$(date +"%Y-%m-%d-%H-%M-%S") && ceph tell osd.0 perf reset all && ceph tell osd.0 bench >> /root/ceph_osd_bench_results/$DATEBENCH-perf-dump-bluestore-osd-0-and-bench-fresh-synced.log && ceph tell osd.0 perf dump bluestore >> /root/ceph_osd_bench_results/$DATEBENCH-perf-dump-bluestore-osd-0-and-bench-fresh-synced.log > { > "bytes_written": 1073741824, > "blocksize": 4194304, > "elapsed_sec": 0.419697298, > "bytes_per_sec": 2558372019.8265371, > "iops": 609.96342178023747 > } > { > "bluestore": { > "kv_flush_lat": { > "avgcount": 57, > "sum": 0.000104419, > "avgtime": 0.000001831 > }, > "kv_commit_lat": { > "avgcount": 57, > "sum": 0.462118443, > "avgtime": 0.008107341 > }, > "kv_sync_lat": { > "avgcount": 57, > "sum": 0.462222862, > "avgtime": 0.008109173 > }, > "kv_final_lat": { > "avgcount": 57, > "sum": 0.012920913, > "avgtime": 0.000226682 > }, > "state_prepare_lat": { > "avgcount": 278, > "sum": 0.373769054, > "avgtime": 0.001344493 > }, > "state_aio_wait_lat": { > "avgcount": 278, > "sum": 3.695556734, > "avgtime": 0.013293369 > }, > "state_io_done_lat": { > "avgcount": 278, > "sum": 0.001566844, > "avgtime": 0.000005636 > }, > "state_kv_queued_lat": { > "avgcount": 278, > "sum": 1.681350076, > "avgtime": 0.006048021 > }, > "state_kv_commiting_lat": { > "avgcount": 278, > "sum": 2.849535426, > "avgtime": 0.010250127 > }, > "state_kv_done_lat": { > "avgcount": 278, > "sum": 0.000068180, > "avgtime": 0.000000245 > }, > "state_deferred_queued_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "state_deferred_aio_wait_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "state_deferred_cleanup_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "state_finishing_lat": { > "avgcount": 278, > "sum": 0.000027149, > "avgtime": 0.000000097 > }, > "state_done_lat": { > "avgcount": 278, > "sum": 0.011008736, > "avgtime": 0.000039599 > }, > "throttle_lat": { > "avgcount": 278, > "sum": 0.238060391, > "avgtime": 0.000856332 > }, > "submit_lat": { > "avgcount": 278, > "sum": 0.462810481, > "avgtime": 0.001664785 > }, > "commit_lat": { > "avgcount": 278, > "sum": 8.601807474, > "avgtime": 0.030941753 > }, > "read_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "read_onode_meta_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "read_wait_aio_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "compress_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "decompress_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "csum_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "compress_success_count": 0, > "compress_rejected_count": 0, > "write_pad_bytes": 0, > "deferred_write_ops": 0, > "deferred_write_bytes": 0, > "write_penalty_read_ops": 0, > "bluestore_allocated": 326101950464, > "bluestore_stored": 326097944081, > "bluestore_compressed": 0, > "bluestore_compressed_allocated": 0, > "bluestore_compressed_original": 0, > "bluestore_onodes": 38650, > "bluestore_pinned_onodes": 0, > "bluestore_onode_hits": 297, > "bluestore_onode_misses": 257, > "bluestore_onode_shard_hits": 2838, > "bluestore_onode_shard_misses": 1, > "bluestore_extents": 1998863, > "bluestore_blobs": 1998541, > "bluestore_buffers": 2828, > "bluestore_buffer_bytes": 19533824, > "bluestore_buffer_hit_bytes": 0, > "bluestore_buffer_miss_bytes": 0, > "bluestore_write_big": 277, > "bluestore_write_big_bytes": 1074913280, > "bluestore_write_big_blobs": 16420, > "bluestore_write_small": 0, > "bluestore_write_small_bytes": 0, > "bluestore_write_small_unused": 0, > "bluestore_write_small_deferred": 0, > "bluestore_write_small_pre_read": 0, > "bluestore_write_small_new": 16420, > "bluestore_txc": 278, > "bluestore_onode_reshard": 256, > "bluestore_blob_split": 0, > "bluestore_extent_compress": 36, > "bluestore_gc_merged": 0, > "bluestore_read_eio": 0, > "bluestore_reads_with_retries": 0, > "bluestore_fragmentation_micros": 0, > "omap_seek_to_first_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_upper_bound_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_lower_bound_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_next_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_get_keys_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "omap_get_values_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "clist_lat": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "remove_lat": { > "avgcount": 256, > "sum": 0.017437900, > "avgtime": 0.000068116 > } > } > } _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx