Hi Janne, Thank you for your response. I use `ceph pg deep-scrub <pg_num>` command, and all returns are point the osd.166. I check SMART data and syslog on osd.166 , the disk are fine. Now the late deep-scrub PG numbers are lower, however it been 5 days since last post. I attached the perf dump for that osd. There are tons of values, do you have any suggest for how to read and analysis the values in the report? Are there any key values worth to notice on below dump result? ceph daemon osd.166 perf dump { "AsyncMessenger::Worker-0": { "msgr_recv_messages": 19341158, "msgr_send_messages": 18796191, "msgr_recv_bytes": 349806218697, "msgr_send_bytes": 280579883194, "msgr_created_connections": 45848, "msgr_active_connections": 371, "msgr_running_total_time": 3472.846508390, "msgr_running_send_time": 1303.229556187, "msgr_running_recv_time": 2666.103910588, "msgr_running_fast_dispatch_time": 279.453905222, "msgr_send_messages_queue_lat": { "avgcount": 18796198, "sum": 2186.559714335, "avgtime": 0.000116329 }, "msgr_handle_ack_lat": { "avgcount": 17717151, "sum": 8.286058140, "avgtime": 0.000000467 } }, "AsyncMessenger::Worker-1": { "msgr_recv_messages": 15162337, "msgr_send_messages": 14791166, "msgr_recv_bytes": 200258563166, "msgr_send_bytes": 133207293543, "msgr_created_connections": 44152, "msgr_active_connections": 377, "msgr_running_total_time": 2560.268490568, "msgr_running_send_time": 952.458957466, "msgr_running_recv_time": 1217.955146530, "msgr_running_fast_dispatch_time": 232.240019379, "msgr_send_messages_queue_lat": { "avgcount": 14791125, "sum": 1897.607826992, "avgtime": 0.000128293 }, "msgr_handle_ack_lat": { "avgcount": 15269688, "sum": 7.098990800, "avgtime": 0.000000464 } }, "AsyncMessenger::Worker-2": { "msgr_recv_messages": 15677023, "msgr_send_messages": 15358783, "msgr_recv_bytes": 228508634479, "msgr_send_bytes": 211006631139, "msgr_created_connections": 45406, "msgr_active_connections": 383, "msgr_running_total_time": 2759.930104879, "msgr_running_send_time": 1053.707455802, "msgr_running_recv_time": 4334.833363876, "msgr_running_fast_dispatch_time": 239.419774153, "msgr_send_messages_queue_lat": { "avgcount": 15358747, "sum": 2015.660278745, "avgtime": 0.000131238 }, "msgr_handle_ack_lat": { "avgcount": 16139329, "sum": 7.595875666, "avgtime": 0.000000470 } }, "bluefs": { "gift_bytes": 0, "reclaim_bytes": 0, "db_total_bytes": 307090153472, "db_used_bytes": 7901011968, "wal_total_bytes": 0, "wal_used_bytes": 0, "slow_total_bytes": 400033120256, "slow_used_bytes": 0, "num_files": 124, "log_bytes": 14528512, "log_compactions": 12, "logged_bytes": 190578688, "files_written_wal": 2, "files_written_sst": 2852, "bytes_written_wal": 295528103936, "bytes_written_sst": 121776099328, "bytes_written_slow": 0, "max_bytes_wal": 0, "max_bytes_db": 9753845760, "max_bytes_slow": 0, "read_random_count": 2269991, "read_random_bytes": 135270214328, "read_random_disk_count": 1190220, "read_random_disk_bytes": 130232185489, "read_random_buffer_count": 1086502, "read_random_buffer_bytes": 5038028839, "read_count": 333292, "read_bytes": 13432447950, "read_prefetch_count": 330425, "read_prefetch_bytes": 13419652916, "read_zeros_candidate": 0, "read_zeros_errors": 0 }, "bluestore": { "kv_flush_lat": { "avgcount": 13561020, "sum": 44.317007002, "avgtime": 0.000003267 }, "kv_commit_lat": { "avgcount": 13561020, "sum": 8141.086529679, "avgtime": 0.000600329 }, "kv_sync_lat": { "avgcount": 13561020, "sum": 8185.403536681, "avgtime": 0.000603597 }, "kv_final_lat": { "avgcount": 13548150, "sum": 852.970428455, "avgtime": 0.000062958 }, "state_prepare_lat": { "avgcount": 18292548, "sum": 243976.346193841, "avgtime": 0.013337471 }, "state_aio_wait_lat": { "avgcount": 18292543, "sum": 878753.135015570, "avgtime": 0.048038872 }, "state_io_done_lat": { "avgcount": 18292538, "sum": 581096.203930563, "avgtime": 0.031766844 }, "state_kv_queued_lat": { "avgcount": 18292538, "sum": 12959.712261286, "avgtime": 0.000708469 }, "state_kv_commiting_lat": { "avgcount": 18292538, "sum": 16689.326675859, "avgtime": 0.000912357 }, "state_kv_done_lat": { "avgcount": 18292538, "sum": 13.643905948, "avgtime": 0.000000745 }, "state_deferred_queued_lat": { "avgcount": 17503783, "sum": 12081096.232394725, "avgtime": 0.690199154 }, "state_deferred_aio_wait_lat": { "avgcount": 17503624, "sum": 21399482.504029255, "avgtime": 1.222574393 }, "state_deferred_cleanup_lat": { "avgcount": 17503624, "sum": 589771.572651375, "avgtime": 0.033694255 }, "state_finishing_lat": { "avgcount": 18292294, "sum": 4.005002683, "avgtime": 0.000000218 }, "state_done_lat": { "avgcount": 18292293, "sum": 477572.150410700, "avgtime": 0.026107834 }, "throttle_lat": { "avgcount": 18292548, "sum": 138008.807746243, "avgtime": 0.007544537 }, "submit_lat": { "avgcount": 18292548, "sum": 244153.544954022, "avgtime": 0.013347158 }, "commit_lat": { "avgcount": 18292538, "sum": 1733481.218359303, "avgtime": 0.094764390 }, "read_lat": { "avgcount": 7533690, "sum": 294110.821093444, "avgtime": 0.039039411 }, "read_onode_meta_lat": { "avgcount": 15240437, "sum": 406.469177723, "avgtime": 0.000026670 }, "read_wait_aio_lat": { "avgcount": 7706742, "sum": 396217.763022442, "avgtime": 0.051411836 }, "compress_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "decompress_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "csum_lat": { "avgcount": 6681264, "sum": 367.861656298, "avgtime": 0.000055058 }, "compress_success_count": 0, "compress_rejected_count": 0, "write_pad_bytes": 2066645730, "deferred_write_ops": 16301044, "deferred_write_bytes": 180197400576, "write_penalty_read_ops": 527186, "bluestore_allocated": 3828392198144, "bluestore_stored": 3729074195303, "bluestore_compressed": 0, "bluestore_compressed_allocated": 0, "bluestore_compressed_original": 0, "bluestore_onodes": 103204, "bluestore_pinned_onodes": 129, "bluestore_onode_hits": 53721789, "bluestore_onode_misses": 1375575, "bluestore_onode_shard_hits": 85413794, "bluestore_onode_shard_misses": 6341623, "bluestore_extents": 1681388, "bluestore_blobs": 735275, "bluestore_buffers": 3640, "bluestore_buffer_bytes": 109752320, "bluestore_buffer_hit_bytes": 208205259098, "bluestore_buffer_miss_bytes": 1591921113076, "bluestore_write_big": 1163116, "bluestore_write_big_bytes": 520215527424, "bluestore_write_big_blobs": 1866660, "bluestore_write_small": 19180172, "bluestore_write_small_bytes": 193721555312, "bluestore_write_small_unused": 565984, "bluestore_write_small_deferred": 18784709, "bluestore_write_small_pre_read": 18307775, "bluestore_write_small_new": 2123046, "bluestore_txc": 18292548, "bluestore_onode_reshard": 1054803, "bluestore_blob_split": 23858, "bluestore_extent_compress": 30660729, "bluestore_gc_merged": 0, "bluestore_read_eio": 0, "bluestore_reads_with_retries": 0, "bluestore_fragmentation_micros": 10, "omap_seek_to_first_lat": { "avgcount": 380178, "sum": 8.970751793, "avgtime": 0.000023596 }, "omap_upper_bound_lat": { "avgcount": 60, "sum": 0.003847889, "avgtime": 0.000064131 }, "omap_lower_bound_lat": { "avgcount": 14, "sum": 0.000914658, "avgtime": 0.000065332 }, "omap_next_lat": { "avgcount": 1040575, "sum": 4.025507055, "avgtime": 0.000003868 }, "omap_get_keys_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "omap_get_values_lat": { "avgcount": 1846, "sum": 0.325166297, "avgtime": 0.000176146 }, "clist_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "remove_lat": { "avgcount": 197142, "sum": 307.438827367, "avgtime": 0.001559479 } }, "cct": { "total_workers": 5, "unhealthy_workers": 0 }, "finisher-commit_finisher": { "queue_len": 0, "complete_latency": { "avgcount": 736479, "sum": 79.932474500, "avgtime": 0.000108533 } }, "finisher-objecter-finisher-0": { "queue_len": 0, "complete_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "mempool": { "bloom_filter_bytes": 0, "bloom_filter_items": 0, "bluestore_alloc_bytes": 86280872, "bluestore_alloc_items": 3234244, "bluestore_cache_data_bytes": 113959032, "bluestore_cache_data_items": 4256, "bluestore_cache_onode_bytes": 63573048, "bluestore_cache_onode_items": 103203, "bluestore_cache_meta_bytes": 93275140, "bluestore_cache_meta_items": 9669738, "bluestore_cache_other_bytes": 372789148, "bluestore_cache_other_items": 4198719, "bluestore_Buffer_bytes": 457920, "bluestore_Buffer_items": 4770, "bluestore_Extent_bytes": 80706192, "bluestore_Extent_items": 1681379, "bluestore_Blob_bytes": 76468080, "bluestore_Blob_items": 735270, "bluestore_SharedBlob_bytes": 82349456, "bluestore_SharedBlob_items": 735263, "bluestore_inline_bl_bytes": 416070, "bluestore_inline_bl_items": 639, "bluestore_fsck_bytes": 0, "bluestore_fsck_items": 0, "bluestore_txc_bytes": 167400, "bluestore_txc_items": 225, "bluestore_writing_deferred_bytes": 4101716, "bluestore_writing_deferred_items": 419, "bluestore_writing_bytes": 1490484, "bluestore_writing_items": 13, "bluefs_bytes": 47008, "bluefs_items": 1205, "bluefs_file_reader_bytes": 6310272, "bluefs_file_reader_items": 228, "bluefs_file_writer_bytes": 672, "bluefs_file_writer_items": 3, "buffer_anon_bytes": 43279309, "buffer_anon_items": 128450, "buffer_meta_bytes": 8452928, "buffer_meta_items": 96056, "osd_bytes": 1254856, "osd_items": 98, "osd_mapbl_bytes": 0, "osd_mapbl_items": 0, "osd_pglog_bytes": 541003528, "osd_pglog_items": 1092365, "osdmap_bytes": 944376, "osdmap_items": 46434, "osdmap_mapping_bytes": 0, "osdmap_mapping_items": 0, "pgmap_bytes": 0, "pgmap_items": 0, "mds_co_bytes": 0, "mds_co_items": 0, "unittest_1_bytes": 0, "unittest_1_items": 0, "unittest_2_bytes": 0, "unittest_2_items": 0 }, "objecter": { "op_active": 0, "op_laggy": 0, "op_send": 0, "op_send_bytes": 0, "op_resend": 0, "op_reply": 0, "op": 0, "op_r": 0, "op_w": 0, "op_rmw": 0, "op_pg": 0, "osdop_stat": 0, "osdop_create": 0, "osdop_read": 0, "osdop_write": 0, "osdop_writefull": 0, "osdop_writesame": 0, "osdop_append": 0, "osdop_zero": 0, "osdop_truncate": 0, "osdop_delete": 0, "osdop_mapext": 0, "osdop_sparse_read": 0, "osdop_clonerange": 0, "osdop_getxattr": 0, "osdop_setxattr": 0, "osdop_cmpxattr": 0, "osdop_rmxattr": 0, "osdop_resetxattrs": 0, "osdop_call": 0, "osdop_watch": 0, "osdop_notify": 0, "osdop_src_cmpxattr": 0, "osdop_pgls": 0, "osdop_pgls_filter": 0, "osdop_other": 0, "linger_active": 0, "linger_send": 0, "linger_resend": 0, "linger_ping": 0, "poolop_active": 0, "poolop_send": 0, "poolop_resend": 0, "poolstat_active": 0, "poolstat_send": 0, "poolstat_resend": 0, "statfs_active": 0, "statfs_send": 0, "statfs_resend": 0, "command_active": 0, "command_send": 0, "command_resend": 0, "map_epoch": 216370, "map_full": 1, "map_inc": 523, "osd_sessions": 0, "osd_session_open": 0, "osd_session_close": 0, "osd_laggy": 0, "omap_wr": 0, "omap_rd": 0, "omap_del": 0 }, "osd": { "op_wip": 3, "op": 8135509, "op_in_bytes": 154235642660, "op_out_bytes": 265025548947, "op_latency": { "avgcount": 8135509, "sum": 6634718.072418675, "avgtime": 0.815525872 }, "op_process_latency": { "avgcount": 8135509, "sum": 980195.656783940, "avgtime": 0.120483630 }, "op_prepare_latency": { "avgcount": 8135717, "sum": 258387.700795098, "avgtime": 0.031759671 }, "op_r": 4594289, "op_r_out_bytes": 265025548947, "op_r_latency": { "avgcount": 4594289, "sum": 1709240.972851229, "avgtime": 0.372036015 }, "op_r_process_latency": { "avgcount": 4594289, "sum": 197804.473172139, "avgtime": 0.043054425 }, "op_r_prepare_latency": { "avgcount": 4594378, "sum": 197987.522605423, "avgtime": 0.043093433 }, "op_w": 2968297, "op_w_in_bytes": 135487463954, "op_w_latency": { "avgcount": 2968297, "sum": 4152375.367140214, "avgtime": 1.398908319 }, "op_w_process_latency": { "avgcount": 2968297, "sum": 682819.077193301, "avgtime": 0.230037316 }, "op_w_prepare_latency": { "avgcount": 2968311, "sum": 49659.932809889, "avgtime": 0.016730030 }, "op_rw": 572923, "op_rw_in_bytes": 18748178706, "op_rw_out_bytes": 0, "op_rw_latency": { "avgcount": 572923, "sum": 773101.732427232, "avgtime": 1.349399015 }, "op_rw_process_latency": { "avgcount": 572923, "sum": 99572.106418500, "avgtime": 0.173796664 }, "op_rw_prepare_latency": { "avgcount": 573028, "sum": 10740.245379786, "avgtime": 0.018742967 }, "op_before_queue_op_lat": { "avgcount": 31713035, "sum": 388.827268064, "avgtime": 0.000012260 }, "op_before_dequeue_op_lat": { "avgcount": 32804530, "sum": 20625888.364228222, "avgtime": 0.628751223 }, "subop": 14720372, "subop_in_bytes": 569180095447, "subop_latency": { "avgcount": 14720372, "sum": 14049383.405657689, "avgtime": 0.954417687 }, "subop_w": 14720372, "subop_w_in_bytes": 569180095447, "subop_w_latency": { "avgcount": 14720372, "sum": 14049383.405657689, "avgtime": 0.954417687 }, "subop_pull": 0, "subop_pull_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "subop_push": 0, "subop_push_in_bytes": 0, "subop_push_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "pull": 0, "push": 39, "push_out_bytes": 9663488, "recovery_ops": 69, "recovery_bytes": 81627171, "loadavg": 495, "cached_crc": 0, "cached_crc_adjusted": 0, "missed_crc": 0, "numpg": 97, "numpg_primary": 20, "numpg_replica": 77, "numpg_stray": 0, "numpg_removing": 0, "heartbeat_to_peers": 45, "map_messages": 16833, "map_message_epochs": 17208, "map_message_epoch_dups": 16720, "messages_delayed_for_map": 0, "osd_map_cache_hit": 51874, "osd_map_cache_miss": 0, "osd_map_cache_miss_low": 0, "osd_map_cache_miss_low_avg": { "avgcount": 0, "sum": 0 }, "osd_map_bl_cache_hit": 6245, "osd_map_bl_cache_miss": 1058, "stat_bytes": 10307917307904, "stat_bytes_used": 4136558845952, "stat_bytes_avail": 6171358461952, "copyfrom": 0, "tier_promote": 0, "tier_flush": 0, "tier_flush_fail": 0, "tier_try_flush": 0, "tier_try_flush_fail": 0, "tier_evict": 0, "tier_whiteout": 0, "tier_dirty": 13145, "tier_clean": 0, "tier_delay": 0, "tier_proxy_read": 0, "tier_proxy_write": 0, "agent_wake": 0, "agent_skip": 0, "agent_flush": 0, "agent_evict": 0, "object_ctx_cache_hit": 8982759, "object_ctx_cache_total": 9249574, "op_cache_hit": 0, "osd_tier_flush_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "osd_tier_promote_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "osd_tier_r_lat": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "osd_pg_info": 18291893, "osd_pg_fastinfo": 17884975, "osd_pg_biginfo": 850 }, "prioritycache": { "target_bytes": 4294967296, "mapped_bytes": 4156628992, "unmapped_bytes": 696713216, "heap_bytes": 4853342208, "cache_bytes": 2840015699 }, "prioritycache:data": { "pri0_bytes": 0, "pri1_bytes": 0, "pri2_bytes": 0, "pri3_bytes": 0, "pri4_bytes": 0, "pri5_bytes": 0, "pri6_bytes": 0, "pri7_bytes": 0, "pri8_bytes": 0, "pri9_bytes": 0, "pri10_bytes": 0, "pri11_bytes": 62092539, "reserved_bytes": 273451781, "committed_bytes": 335544320 }, "prioritycache:kv": { "pri0_bytes": 143620256, "pri1_bytes": 760082368, "pri2_bytes": 0, "pri3_bytes": 0, "pri4_bytes": 0, "pri5_bytes": 0, "pri6_bytes": 0, "pri7_bytes": 0, "pri8_bytes": 0, "pri9_bytes": 0, "pri10_bytes": 0, "pri11_bytes": 124185078, "reserved_bytes": 280735146, "committed_bytes": 1308622848 }, "prioritycache:meta": { "pri0_bytes": 0, "pri1_bytes": 770032640, "pri2_bytes": 0, "pri3_bytes": 0, "pri4_bytes": 0, "pri5_bytes": 0, "pri6_bytes": 0, "pri7_bytes": 0, "pri8_bytes": 0, "pri9_bytes": 0, "pri10_bytes": 0, "pri11_bytes": 124185078, "reserved_bytes": 280187402, "committed_bytes": 1174405120 }, "recoverystate_perf": { "initial_latency": { "avgcount": 97, "sum": 21.074621061, "avgtime": 0.217264134 }, "started_latency": { "avgcount": 259, "sum": 4521309.880731710, "avgtime": 17456.794906300 }, "reset_latency": { "avgcount": 356, "sum": 1125.771917464, "avgtime": 3.162280667 }, "start_latency": { "avgcount": 356, "sum": 0.027642377, "avgtime": 0.000077647 }, "primary_latency": { "avgcount": 41, "sum": 1080466.472787249, "avgtime": 26352.840799689 }, "peering_latency": { "avgcount": 61, "sum": 82.099913849, "avgtime": 1.345900227 }, "backfilling_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "waitremotebackfillreserved_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "waitlocalbackfillreserved_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "notbackfilling_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "repnotrecovering_latency": { "avgcount": 206, "sum": 3440045.167580858, "avgtime": 16699.248386314 }, "repwaitrecoveryreserved_latency": { "avgcount": 107, "sum": 2019.499114397, "avgtime": 18.873823499 }, "repwaitbackfillreserved_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "reprecovering_latency": { "avgcount": 107, "sum": 125.628611271, "avgtime": 1.174099170 }, "activating_latency": { "avgcount": 55, "sum": 18.614803717, "avgtime": 0.338450976 }, "waitlocalrecoveryreserved_latency": { "avgcount": 24, "sum": 893.960436261, "avgtime": 37.248351510 }, "waitremoterecoveryreserved_latency": { "avgcount": 24, "sum": 130.550360380, "avgtime": 5.439598349 }, "recovering_latency": { "avgcount": 24, "sum": 119.410081526, "avgtime": 4.975420063 }, "recovered_latency": { "avgcount": 55, "sum": 0.003922731, "avgtime": 0.000071322 }, "clean_latency": { "avgcount": 35, "sum": 1079893.321498499, "avgtime": 30854.094899957 }, "active_latency": { "avgcount": 35, "sum": 1080422.236261448, "avgtime": 30869.206750327 }, "replicaactive_latency": { "avgcount": 99, "sum": 3440679.339242395, "avgtime": 34754.336760024 }, "stray_latency": { "avgcount": 295, "sum": 306.115521089, "avgtime": 1.037679732 }, "getinfo_latency": { "avgcount": 61, "sum": 16.124036017, "avgtime": 0.264328459 }, "getlog_latency": { "avgcount": 61, "sum": 0.644097014, "avgtime": 0.010558967 }, "waitactingchange_latency": { "avgcount": 6, "sum": 6.078793861, "avgtime": 1.013132310 }, "incomplete_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "down_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "getmissing_latency": { "avgcount": 55, "sum": 22.396866885, "avgtime": 0.407215761 }, "waitupthru_latency": { "avgcount": 46, "sum": 42.930127429, "avgtime": 0.933263639 }, "notrecovering_latency": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "rocksdb": { "get": 7655559, "submit_transaction": 18292538, "submit_transaction_sync": 13561020, "get_latency": { "avgcount": 7655559, "sum": 739.658957592, "avgtime": 0.000096617 }, "submit_latency": { "avgcount": 18292538, "sum": 5067.560589693, "avgtime": 0.000277028 }, "submit_sync_latency": { "avgcount": 13561020, "sum": 2848.409807758, "avgtime": 0.000210043 }, "compact": 0, "compact_range": 0, "compact_queue_merge": 0, "compact_queue_len": 0, "rocksdb_write_wal_time": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "rocksdb_write_memtable_time": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "rocksdb_write_delay_time": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 }, "rocksdb_write_pre_and_post_time": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-bluestore_throttle_bytes": { "val": 14143544, "max": 67108864, "get_started": 18292548, "get": 18292548, "get_sum": 14437937592005, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 0, "take_sum": 0, "put": 12322127, "put_sum": 14437923448461, "wait": { "avgcount": 171841, "sum": 14343.120586852, "avgtime": 0.083467394 } }, "throttle-bluestore_throttle_deferred_bytes": { "val": 180374894, "max": 201326592, "get_started": 1887317, "get": 17503877, "get_sum": 13029610543822, "get_or_fail_fail": 1887317, "get_or_fail_success": 15616560, "take": 0, "take_sum": 0, "put": 6827565, "put_sum": 13029430168928, "wait": { "avgcount": 1887144, "sum": 122958.521042204, "avgtime": 0.065155876 } }, "throttle-msgr_dispatch_throttler-client": { "val": 0, "max": 104857600, "get_started": 0, "get": 8148556, "get_sum": 156467375000, "get_or_fail_fail": 0, "get_or_fail_success": 8148556, "take": 0, "take_sum": 0, "put": 8148556, "put_sum": 156467375000, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-msgr_dispatch_throttler-cluster": { "val": 0, "max": 104857600, "get_started": 0, "get": 26883644, "get_sum": 587774146622, "get_or_fail_fail": 0, "get_or_fail_success": 26883644, "take": 0, "take_sum": 0, "put": 26883644, "put_sum": 587774146622, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-msgr_dispatch_throttler-hb_back_client": { "val": 0, "max": 104857600, "get_started": 0, "get": 3100962, "get_sum": 6304255746, "get_or_fail_fail": 0, "get_or_fail_success": 3100962, "take": 0, "take_sum": 0, "put": 3100962, "put_sum": 6304255746, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-msgr_dispatch_throttler-hb_back_server": { "val": 0, "max": 104857600, "get_started": 0, "get": 4233388, "get_sum": 8606477796, "get_or_fail_fail": 0, "get_or_fail_success": 4233388, "take": 0, "take_sum": 0, "put": 4233388, "put_sum": 8606477796, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-msgr_dispatch_throttler-hb_front_client": { "val": 0, "max": 104857600, "get_started": 0, "get": 3100960, "get_sum": 6304251680, "get_or_fail_fail": 0, "get_or_fail_success": 3100960, "take": 0, "take_sum": 0, "put": 3100960, "put_sum": 6304251680, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-msgr_dispatch_throttler-hb_front_server": { "val": 0, "max": 104857600, "get_started": 0, "get": 4233388, "get_sum": 8606477796, "get_or_fail_fail": 0, "get_or_fail_success": 4233388, "take": 0, "take_sum": 0, "put": 4233388, "put_sum": 8606477796, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-msgr_dispatch_throttler-ms_objecter": { "val": 0, "max": 104857600, "get_started": 0, "get": 0, "get_sum": 0, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 0, "take_sum": 0, "put": 0, "put_sum": 0, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-objecter_bytes": { "val": 0, "max": 104857600, "get_started": 0, "get": 0, "get_sum": 0, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 0, "take_sum": 0, "put": 0, "put_sum": 0, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-objecter_ops": { "val": 0, "max": 1024, "get_started": 0, "get": 0, "get_sum": 0, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 0, "take_sum": 0, "put": 0, "put_sum": 0, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-osd_client_bytes": { "val": 131570, "max": 524288000, "get_started": 0, "get": 8138218, "get_sum": 156466612850, "get_or_fail_fail": 0, "get_or_fail_success": 8138218, "take": 0, "take_sum": 0, "put": 11648776, "put_sum": 156466481280, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-osd_client_messages": { "val": 6, "max": 256, "get_started": 0, "get": 8617830, "get_sum": 8617830, "get_or_fail_fail": 5295257, "get_or_fail_success": 8617830, "take": 0, "take_sum": 0, "put": 8617824, "put_sum": 8617824, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } } } -----Original Message----- From: Janne Johansson <icepic.dz@xxxxxxxxx> Sent: Thursday, May 4, 2023 10:57 PM To: Peter <petersun@xxxxxxxxxxxx> Cc: ceph-users@xxxxxxx Subject: Re: pg deep-scrub issue >undergo deepscrub and regular scrub cannot be completed in a timely manner. I have noticed that these PGs appear to be concentrated on a single OSD. I am seeking your guidance on how to address this issue and would appreciate any insights or suggestions you may have. > The usual "see if there are SMART errors on the drive", "check dmesg for this drive" and see if this OSD has lots larger latencies* than the other similar drives and if any of these are true, take it out of the cluster and replace it with a new working drive. *) Perhaps with iostat, checking the service time and utilization%, perhaps with "# ceph daemon osd.X perf dump" on the host running this OSD, "ceph osd perf" and see if this one OSD is an outlier in terms of latencies -- May the most significant bit of your life be positive. _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx