PG inconsistent, "pg repair" not working

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,
During normal operation our cluster suddenly thrown an error and since then we have had 1 inconsistent PG, and one of clients sharing cephfs mount has started to occasionally log "ceph: Failed to find inode X".
"ceph pg repair" deep scrubs the PG and fails with the same error in log.
Can anyone advise how to fix this?


log entry:
2018-09-20 06:48:23.081 7f0b2efd9700 -1 log_channel(cluster) log [ERR] : 1.92 soid 1:496296a8:::1000f44d0f4.00000018:head: failed to pick suitable object info
2018-09-20 06:48:23.081 7f0b2efd9700 -1 log_channel(cluster) log [ERR] : scrub 1.92 1:496296a8:::1000f44d0f4.00000018:head on disk size (3751936) does not match object info size (0) adjusted for ondisk to (0)
2018-09-20 06:50:36.925 7f0b2efd9700 -1 log_channel(cluster) log [ERR] : 1.92 scrub 3 errors

# ceph -v
ceph version 13.2.1 (5533ecdc0fda920179d7ad84e0aa65a127b20d77) mimic (stable)


# ceph health detail
HEALTH_ERR 3 scrub errors; Possible data damage: 1 pg inconsistent
OSD_SCRUB_ERRORS 3 scrub errors
PG_DAMAGED Possible data damage: 1 pg inconsistent
pg 1.92 is active+clean+inconsistent, acting [4,9]


# rados list-inconsistent-obj 1.92
{"epoch":519,"inconsistents":[]}


# ceph pg 1.92 query
{
"state": "active+clean+inconsistent",
"snap_trimq": "[]",
"snap_trimq_len": 0,
"epoch": 520,
"up": [
4,
9
],
"acting": [
4,
9
],
"acting_recovery_backfill": [
"4",
"9"
],
"info": {
"pgid": "1.92",
"last_update": "520'2456340",
"last_complete": "520'2456340",
"log_tail": "520'2453330",
"last_user_version": 7914566,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 63,
"epoch_pool_created": 63,
"last_epoch_started": 520,
"last_interval_started": 519,
"last_epoch_clean": 520,
"last_interval_clean": 519,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 519,
"same_interval_since": 519,
"same_primary_since": 514,
"last_scrub": "520'2456105",
"last_scrub_stamp": "2018-09-25 02:17:35.631365",
"last_deep_scrub": "520'2456105",
"last_deep_scrub_stamp": "2018-09-25 02:17:35.631365",
"last_clean_scrub_stamp": "2018-09-19 02:27:22.656268"
},
"stats": {
"version": "520'2456340",
"reported_seq": "6115579",
"reported_epoch": "520",
"state": "active+clean+inconsistent",
"last_fresh": "2018-09-25 03:02:34.338256",
"last_change": "2018-09-25 02:17:35.631476",
"last_active": "2018-09-25 03:02:34.338256",
"last_peered": "2018-09-25 03:02:34.338256",
"last_clean": "2018-09-25 03:02:34.338256",
"last_became_active": "2018-09-24 15:25:30.238044",
"last_became_peered": "2018-09-24 15:25:30.238044",
"last_unstale": "2018-09-25 03:02:34.338256",
"last_undegraded": "2018-09-25 03:02:34.338256",
"last_fullsized": "2018-09-25 03:02:34.338256",
"mapping_epoch": 519,
"log_start": "520'2453330",
"ondisk_log_start": "520'2453330",
"created": 63,
"last_epoch_clean": 520,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "520'2456105",
"last_scrub_stamp": "2018-09-25 02:17:35.631365",
"last_deep_scrub": "520'2456105",
"last_deep_scrub_stamp": "2018-09-25 02:17:35.631365",
"last_clean_scrub_stamp": "2018-09-19 02:27:22.656268",
"log_size": 3010,
"ondisk_log_size": 3010,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 23138366490,
"num_objects": 479532,
"num_object_clones": 0,
"num_object_copies": 959064,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 479532,
"num_whiteouts": 0,
"num_read": 3295720,
"num_read_kb": 63508374,
"num_write": 2495519,
"num_write_kb": 81795199,
"num_scrub_errors": 3,
"num_shallow_scrub_errors": 3,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 550,
"num_bytes_recovered": 15760916,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0
},
"up": [
4,
9
],
"acting": [
4,
9
],
"blocked_by": [],
"up_primary": 4,
"acting_primary": 4,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 520,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
"peer_info": [
{
"peer": "9",
"pgid": "1.92",
"last_update": "520'2456340",
"last_complete": "515'2438936",
"log_tail": "511'2435926",
"last_user_version": 7902301,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 63,
"epoch_pool_created": 63,
"last_epoch_started": 520,
"last_interval_started": 519,
"last_epoch_clean": 520,
"last_interval_clean": 519,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 519,
"same_interval_since": 519,
"same_primary_since": 514,
"last_scrub": "520'2456105",
"last_scrub_stamp": "2018-09-25 02:17:35.631365",
"last_deep_scrub": "520'2456105",
"last_deep_scrub_stamp": "2018-09-25 02:17:35.631365",
"last_clean_scrub_stamp": "2018-09-19 02:27:22.656268"
},
"stats": {
"version": "515'2438932",
"reported_seq": "6063632",
"reported_epoch": "515",
"state": "active+clean+inconsistent",
"last_fresh": "2018-09-24 15:24:38.783419",
"last_change": "2018-09-24 15:24:01.437118",
"last_active": "2018-09-24 15:24:38.783419",
"last_peered": "2018-09-24 15:24:38.783419",
"last_clean": "2018-09-24 15:24:38.783419",
"last_became_active": "2018-09-24 15:23:38.017006",
"last_became_peered": "2018-09-24 15:23:38.017006",
"last_unstale": "2018-09-24 15:24:38.783419",
"last_undegraded": "2018-09-24 15:24:38.783419",
"last_fullsized": "2018-09-24 15:24:38.783419",
"mapping_epoch": 519,
"log_start": "511'2435926",
"ondisk_log_start": "511'2435926",
"created": 63,
"last_epoch_clean": 515,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "511'2422729",
"last_scrub_stamp": "2018-09-23 21:04:22.513960",
"last_deep_scrub": "511'2422729",
"last_deep_scrub_stamp": "2018-09-23 21:04:22.513960",
"last_clean_scrub_stamp": "2018-09-19 02:27:22.656268",
"log_size": 3006,
"ondisk_log_size": 3006,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 23120223817,
"num_objects": 477960,
"num_object_clones": 0,
"num_object_copies": 955920,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 477960,
"num_whiteouts": 0,
"num_read": 3261518,
"num_read_kb": 62830694,
"num_write": 2479763,
"num_write_kb": 80991966,
"num_scrub_errors": 3,
"num_shallow_scrub_errors": 3,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 540,
"num_bytes_recovered": 15681499,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0
},
"up": [
4,
9
],
"acting": [
4,
9
],
"blocked_by": [],
"up_primary": 4,
"acting_primary": 4,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 520,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
}
],
"recovery_state": [
{
"name": "Started/Primary/Active",
"enter_time": "2018-09-24 15:25:29.632727",
"might_have_unfound": [
{
"osd": "9",
"status": "already probed"
}
],
"recovery_progress": {
"backfill_targets": [],
"waiting_on_backfill": [],
"last_backfill_started": "MIN",
"backfill_info": {
"begin": "MIN",
"end": "MIN",
"objects": []
},
"peer_backfill_info": [],
"backfills_in_flight": [],
"recovering": [],
"pg_backend": {
"pull_from_peer": [],
"pushing": []
}
},
"scrub": {
"scrubber.epoch_start": "519",
"scrubber.active": false,
"scrubber.state": "INACTIVE",
"scrubber.start": "MIN",
"scrubber.end": "MIN",
"scrubber.max_end": "MIN",
"scrubber.subset_last_update": "0'0",
"scrubber.deep": false,
"scrubber.waiting_on_whom": []
}
},
{
"name": "Started",
"enter_time": "2018-09-24 15:25:28.357659"
}
],
"agent_state": {}
}
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux