Hi Varada, On Mon, Dec 14, 2015 at 03:23:20AM +0000, Varada Kari wrote: > Can get the details of > > 1. ceph health detail > 2. ceph pg query <pg-num> > > of any one PG stuck peering > > > Varada The full health detail is over 9000 lines, but here's a summary: # ceph health detail | head HEALTH_WARN 3072 pgs peering; 3072 pgs stuck inactive; 3072 pgs stuck unclean; 1570 requests are blocked > 32 sec; 25 osds have slow requests; noout flag(s) set pg 3.1ae is stuck inactive for 23264.342056, current state peering, last acting [16,4,8] pg 2.1af is stuck inactive for 23621.565024, current state peering, last acting [6,0] pg 6.1ab is stuck inactive for 22843.875498, current state peering, last acting [27,18,54] pg 3.1af is stuck inactive for 23315.971276, current state peering, last acting [17,16,24] pg 2.1ae is stuck inactive for 19278.004657, current state peering, last acting [7,1] pg 6.1aa is stuck inactive for 19321.668092, current state peering, last acting [31,39,56] pg 3.1a8 is stuck inactive for 22897.969982, current state peering, last acting [16,17,24] pg 2.1a9 is stuck inactive for 23516.554757, current state peering, last acting [14,7] pg 6.1ad is stuck inactive for 23105.915508, current state peering, last acting [33,47,20] # ceph health detail | grep -v peering 34 ops are blocked > 16777.2 sec 1289 ops are blocked > 8388.61 sec 50 ops are blocked > 4194.3 sec 34 ops are blocked > 2097.15 sec 68 ops are blocked > 1048.58 sec 13 ops are blocked > 524.288 sec 11 ops are blocked > 16777.2 sec on osd.0 4 ops are blocked > 8388.61 sec on osd.0 5 ops are blocked > 8388.61 sec on osd.1 100 ops are blocked > 8388.61 sec on osd.2 100 ops are blocked > 8388.61 sec on osd.3 100 ops are blocked > 8388.61 sec on osd.4 80 ops are blocked > 8388.61 sec on osd.5 34 ops are blocked > 8388.61 sec on osd.6 27 ops are blocked > 4194.3 sec on osd.6 15 ops are blocked > 2097.15 sec on osd.6 6 ops are blocked > 1048.58 sec on osd.6 9 ops are blocked > 524.288 sec on osd.6 2 ops are blocked > 16777.2 sec on osd.7 20 ops are blocked > 4194.3 sec on osd.7 16 ops are blocked > 2097.15 sec on osd.7 62 ops are blocked > 1048.58 sec on osd.7 85 ops are blocked > 8388.61 sec on osd.8 80 ops are blocked > 8388.61 sec on osd.9 13 ops are blocked > 16777.2 sec on osd.10 3 ops are blocked > 8388.61 sec on osd.10 1 ops are blocked > 4194.3 sec on osd.10 1 ops are blocked > 2097.15 sec on osd.10 6 ops are blocked > 8388.61 sec on osd.11 5 ops are blocked > 8388.61 sec on osd.12 4 ops are blocked > 8388.61 sec on osd.13 2 ops are blocked > 8388.61 sec on osd.14 4 ops are blocked > 524.288 sec on osd.14 7 ops are blocked > 16777.2 sec on osd.15 12 ops are blocked > 8388.61 sec on osd.15 2 ops are blocked > 4194.3 sec on osd.15 2 ops are blocked > 2097.15 sec on osd.15 100 ops are blocked > 8388.61 sec on osd.16 82 ops are blocked > 8388.61 sec on osd.17 1 ops are blocked > 16777.2 sec on osd.18 100 ops are blocked > 8388.61 sec on osd.21 86 ops are blocked > 8388.61 sec on osd.24 100 ops are blocked > 8388.61 sec on osd.38 100 ops are blocked > 8388.61 sec on osd.42 100 ops are blocked > 8388.61 sec on osd.44 1 ops are blocked > 8388.61 sec on osd.51 25 osds have slow requests noout flag(s) set # ceph pg 3.1ae query <<< hung, until ^c >>> # ceph pg 2.1af query { "state": "peering", "snap_trimq": "[]", "epoch": 357236, "up": [ 6, 0 ], "acting": [ 6, 0 ], "info": { "pgid": "2.1af", "last_update": "356361'1923761", "last_complete": "356361'1923761", "log_tail": "341349'1920757", "last_user_version": 1923761, "last_backfill": "MAX", "purged_snaps": "[1~34,38~1b,55~2,59~2a,84~68,ee~62]", "history": { "epoch_created": 1, "last_epoch_started": 356496, "last_epoch_clean": 356496, "last_epoch_split": 0, "same_up_since": 357218, "same_interval_since": 357218, "same_primary_since": 357218, "last_scrub": "356347'1923757", "last_scrub_stamp": "2015-12-12 12:18:54.719534", "last_deep_scrub": "356347'1923757", "last_deep_scrub_stamp": "2015-12-12 12:18:54.719534", "last_clean_scrub_stamp": "2015-12-12 12:18:54.719534" }, "stats": { "version": "356361'1923761", "reported_seq": "37552607", "reported_epoch": "357218", "state": "peering", "last_fresh": "2015-12-14 12:54:41.084804", "last_change": "2015-12-14 12:54:41.084804", "last_active": "2015-12-14 07:53:05.850772", "last_peered": "2015-12-14 07:53:05.850772", "last_clean": "2015-12-14 07:53:05.850772", "last_became_active": "2013-09-11 09:13:39.309600", "last_became_peered": "2013-09-11 09:13:39.309600", "last_unstale": "2015-12-14 12:54:41.084804", "last_undegraded": "2015-12-14 12:54:41.084804", "last_fullsized": "2015-12-14 12:54:41.084804", "mapping_epoch": 357168, "log_start": "341349'1920757", "ondisk_log_start": "341349'1920757", "created": 1, "last_epoch_clean": 356496, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "356347'1923757", "last_scrub_stamp": "2015-12-12 12:18:54.719534", "last_deep_scrub": "356347'1923757", "last_deep_scrub_stamp": "2015-12-12 12:18:54.719534", "last_clean_scrub_stamp": "2015-12-12 12:18:54.719534", "log_size": 3004, "ondisk_log_size": 3004, "stats_invalid": "0", "stat_sum": { "num_bytes": 7360028160, "num_objects": 2107, "num_object_clones": 642, "num_object_copies": 4214, "num_objects_missing_on_primary": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 569, "num_whiteouts": 0, "num_read": 726240, "num_read_kb": 31291910, "num_write": 127250, "num_write_kb": 13514083, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 2187, "num_bytes_recovered": 9137582592, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0 }, "up": [ 6, 0 ], "acting": [ 6, 0 ], "blocked_by": [ 0 ], "up_primary": 6, "acting_primary": 6 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 356496, "hit_set_history": { "current_last_update": "0'0", "current_last_stamp": "0.000000", "current_info": { "begin": "0.000000", "end": "0.000000", "version": "0'0" }, "history": [] } }, "peer_info": [], "recovery_state": [ { "name": "Started\/Primary\/Peering\/GetInfo", "enter_time": "2015-12-14 12:54:41.084784", "requested_info_from": [ { "osd": "0" } ] }, { "name": "Started\/Primary\/Peering", "enter_time": "2015-12-14 12:54:41.084773", "past_intervals": [ { "first": 356495, "last": 356560, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 356561, "last": 356608, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 356609, "last": 356655, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 356656, "last": 356670, "maybe_went_rw": 1, "up": [ 6 ], "acting": [ 6 ], "primary": 6, "up_primary": 6 }, { "first": 356671, "last": 356681, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 356682, "last": 356722, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 356723, "last": 356723, "maybe_went_rw": 0, "up": [], "acting": [], "primary": -1, "up_primary": -1 }, { "first": 356724, "last": 356824, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 356825, "last": 356876, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 356877, "last": 356920, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 356921, "last": 356921, "maybe_went_rw": 0, "up": [], "acting": [], "primary": -1, "up_primary": -1 }, { "first": 356922, "last": 356958, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 356959, "last": 356963, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 356964, "last": 357025, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 357026, "last": 357026, "maybe_went_rw": 0, "up": [], "acting": [], "primary": -1, "up_primary": -1 }, { "first": 357027, "last": 357041, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 357042, "last": 357081, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 357082, "last": 357082, "maybe_went_rw": 0, "up": [ 6 ], "acting": [ 6 ], "primary": 6, "up_primary": 6 }, { "first": 357083, "last": 357088, "maybe_went_rw": 0, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 357089, "last": 357089, "maybe_went_rw": 0, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 }, { "first": 357090, "last": 357167, "maybe_went_rw": 1, "up": [ 6, 0 ], "acting": [ 6, 0 ], "primary": 6, "up_primary": 6 }, { "first": 357168, "last": 357217, "maybe_went_rw": 1, "up": [ 0 ], "acting": [ 0 ], "primary": 0, "up_primary": 0 } ], "probing_osds": [ "0", "6" ], "down_osds_we_would_probe": [], "peering_blocked_by": [] }, { "name": "Started", "enter_time": "2015-12-14 12:54:41.084717" } ], "agent_state": {} } Chris _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com