active+recovery_unfound+degraded in Pacific

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,
Last week I upgraded my production cluster to Pacific. the cluster was
healthy until a few hours ago.
When scrub  run 4hrs ago  left the cluster in an inconsistent state. Then
issued the command ceph pg repair 7.182 to try to repair the cluster but
ended with active+recovery_unfound+degraded

All OSDs are up and all running bluestore with replication of 3 and minimum
size of 2. I have restarted all OSD but still not helping.

Any recommendations on how to recover the cluster safely?

I have attached result of ceph pg 7.182 query

 ceph health detail
HEALTH_ERR 1/2459601 objects unfound (0.000%); Possible data damage: 1 pg
recovery_unfound; Degraded data redundancy: 3/7045706 objects degraded
(0.000%), 1 pg degraded
[WRN] OBJECT_UNFOUND: 1/2459601 objects unfound (0.000%)
    pg 7.182 has 1 unfound objects
[ERR] PG_DAMAGED: Possible data damage: 1 pg recovery_unfound
    pg 7.182 is active+recovery_unfound+degraded, acting [15,1,11], 1
unfound
[WRN] PG_DEGRADED: Degraded data redundancy: 3/7045706 objects degraded
(0.000%), 1 pg degraded
    pg 7.182 is active+recovery_unfound+degraded, acting [15,1,11], 1
unfound



ceph -w
  cluster:
    id:     4b9f6959-fead-4ada-ac58-de5d7b149286
    health: HEALTH_ERR
            1/2459586 objects unfound (0.000%)
            Possible data damage: 1 pg recovery_unfound
            Degraded data redundancy: 3/7045661 objects degraded (0.000%),
1 pg degraded

  services:
    mon: 3 daemons, quorum mon-a,mon-b,mon-c (age 38m)
    mgr: mon-a(active, since 38m)
    osd: 46 osds: 46 up (since 25m), 46 in (since 3w)

  data:
    pools:   4 pools, 705 pgs
    objects: 2.46M objects, 9.1 TiB
    usage:   24 TiB used, 95 TiB / 119 TiB avail
    pgs:     3/7045661 objects degraded (0.000%)
             1/2459586 objects unfound (0.000%)
             701 active+clean
             3   active+clean+scrubbing+deep
             1   active+recovery_unfound+degraded

ceph pg 7.182 list_unfound
{
    "num_missing": 1,
    "num_unfound": 1,
    "objects": [
        {
            "oid": {
                "oid": "rbd_data.2f18f2a67fad72.000000000002021a",
                "key": "",
                "snapid": -2,
                "hash": 3951004034,
                "max": 0,
                "pool": 7,
                "namespace": ""
            },
            "need": "184249'118613008",
            "have": "0'0",
            "flags": "none",
            "clean_regions": "clean_offsets: [], clean_omap: 0, new_object:
1",
            "locations": []
        }
    ],
    "state": "NotRecovering",
    "available_might_have_unfound": true,
    "might_have_unfound": [],
    "more": false
}
ceph pg 7.182 query
{
    "snap_trimq": "[]",
    "snap_trimq_len": 0,
    "state": "active+recovery_unfound+degraded",
    "epoch": 184487,
    "up": [
        15,
        1,
        11
    ],
    "acting": [
        15,
        1,
        11
    ],
    "acting_recovery_backfill": [
        "1",
        "11",
        "15"
    ],
    "info": {
        "pgid": "7.182",
        "last_update": "184487'118622945",
        "last_complete": "0'0",
        "log_tail": "184260'118615934",
        "last_user_version": 174805058,
        "last_backfill": "MAX",
        "purged_snaps": [],
        "history": {
            "epoch_created": 80613,
            "epoch_pool_created": 826,
            "last_epoch_started": 184402,
            "last_interval_started": 184401,
            "last_epoch_clean": 184066,
            "last_interval_clean": 184056,
            "last_epoch_split": 80613,
            "last_epoch_marked_full": 0,
            "same_up_since": 184401,
            "same_interval_since": 184401,
            "same_primary_since": 184401,
            "last_scrub": "184250'118615197",
            "last_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
            "last_deep_scrub": "184250'118615197",
            "last_deep_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
            "last_clean_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
            "prior_readable_until_ub": 12.742730181000001
        },
        "stats": {
            "version": "184487'118622945",
            "reported_seq": "126997747",
            "reported_epoch": "184487",
            "state": "active+recovery_unfound+degraded",
            "last_fresh": "2021-04-29T00:17:31.577010+0300",
            "last_change": "2021-04-28T23:40:16.308380+0300",
            "last_active": "2021-04-29T00:17:31.577010+0300",
            "last_peered": "2021-04-29T00:17:31.577010+0300",
            "last_clean": "2021-04-28T21:24:38.946369+0300",
            "last_became_active": "2021-04-28T23:40:03.565550+0300",
            "last_became_peered": "2021-04-28T23:40:03.565550+0300",
            "last_unstale": "2021-04-29T00:17:31.577010+0300",
            "last_undegraded": "2021-04-28T23:40:03.531480+0300",
            "last_fullsized": "2021-04-29T00:17:31.577010+0300",
            "mapping_epoch": 184401,
            "log_start": "184260'118615934",
            "ondisk_log_start": "184260'118615934",
            "created": 80613,
            "last_epoch_clean": 184066,
            "parent": "0.0",
            "parent_split_bits": 9,
            "last_scrub": "184250'118615197",
            "last_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
            "last_deep_scrub": "184250'118615197",
            "last_deep_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
            "last_clean_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
            "log_size": 7011,
            "ondisk_log_size": 7011,
            "stats_invalid": false,
            "dirty_stats_invalid": false,
            "omap_stats_invalid": false,
            "hitset_stats_invalid": false,
            "hitset_bytes_stats_invalid": false,
            "pin_stats_invalid": false,
            "manifest_stats_invalid": true,
            "snaptrimq_len": 0,
            "stat_sum": {
                "num_bytes": 16702544896,
                "num_objects": 4153,
                "num_object_clones": 147,
                "num_object_copies": 12459,
                "num_objects_missing_on_primary": 1,
                "num_objects_missing": 1,
                "num_objects_degraded": 3,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 1,
                "num_objects_dirty": 4153,
                "num_whiteouts": 37,
                "num_read": 10708443,
                "num_read_kb": 271924814,
                "num_write": 112160012,
                "num_write_kb": 1709025860,
                "num_scrub_errors": 0,
                "num_shallow_scrub_errors": 0,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 16180,
                "num_bytes_recovered": 39993889280,
                "num_keys_recovered": 0,
                "num_objects_omap": 0,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0,
                "num_flush": 0,
                "num_flush_kb": 0,
                "num_evict": 0,
                "num_evict_kb": 0,
                "num_promote": 0,
                "num_flush_mode_high": 0,
                "num_flush_mode_low": 0,
                "num_evict_mode_some": 0,
                "num_evict_mode_full": 0,
                "num_objects_pinned": 0,
                "num_legacy_snapsets": 0,
                "num_large_omap_objects": 0,
                "num_objects_manifest": 0,
                "num_omap_bytes": 0,
                "num_omap_keys": 0,
                "num_objects_repaired": 0
            },
            "up": [
                15,
                1,
                11
            ],
            "acting": [
                15,
                1,
                11
            ],
            "avail_no_missing": [],
            "object_location_counts": [
                {
                    "shards": "",
                    "objects": 1
                },
                {
                    "shards": "1,11,15",
                    "objects": 4152
                }
            ],
            "blocked_by": [],
            "up_primary": 15,
            "acting_primary": 15,
            "purged_snaps": []
        },
        "empty": 0,
        "dne": 0,
        "incomplete": 0,
        "last_epoch_started": 184402,
        "hit_set_history": {
            "current_last_update": "0'0",
            "history": []
        }
    },
    "peer_info": [
        {
            "peer": "1",
            "pgid": "7.182",
            "last_update": "184487'118622945",
            "last_complete": "184487'118622945",
            "log_tail": "184250'118614019",
            "last_user_version": 174803123,
            "last_backfill": "MAX",
            "purged_snaps": [],
            "history": {
                "epoch_created": 80613,
                "epoch_pool_created": 826,
                "last_epoch_started": 184402,
                "last_interval_started": 184401,
                "last_epoch_clean": 184066,
                "last_interval_clean": 184056,
                "last_epoch_split": 80613,
                "last_epoch_marked_full": 0,
                "same_up_since": 184401,
                "same_interval_since": 184401,
                "same_primary_since": 184401,
                "last_scrub": "184250'118615197",
                "last_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_deep_scrub": "184250'118615197",
                "last_deep_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_clean_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "prior_readable_until_ub": 12.742730181000001
            },
            "stats": {
                "version": "184387'118621010",
                "reported_seq": "126995610",
                "reported_epoch": "184401",
                "state": "active+undersized+degraded",
                "last_fresh": "2021-04-28T23:40:01.886305+0300",
                "last_change": "2021-04-28T23:39:04.307035+0300",
                "last_active": "2021-04-28T23:40:01.886305+0300",
                "last_peered": "2021-04-28T23:40:01.886305+0300",
                "last_clean": "2021-04-28T21:24:38.946369+0300",
                "last_became_active": "2021-04-28T23:39:04.307035+0300",
                "last_became_peered": "2021-04-28T23:39:04.307035+0300",
                "last_unstale": "2021-04-28T23:40:01.886305+0300",
                "last_undegraded": "2021-04-28T23:39:04.305618+0300",
                "last_fullsized": "2021-04-28T23:39:04.305487+0300",
                "mapping_epoch": 184401,
                "log_start": "184250'118614019",
                "ondisk_log_start": "184250'118614019",
                "created": 80613,
                "last_epoch_clean": 184066,
                "parent": "0.0",
                "parent_split_bits": 9,
                "last_scrub": "184250'118615197",
                "last_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_deep_scrub": "184250'118615197",
                "last_deep_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_clean_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "log_size": 6991,
                "ondisk_log_size": 6991,
                "stats_invalid": false,
                "dirty_stats_invalid": false,
                "omap_stats_invalid": false,
                "hitset_stats_invalid": false,
                "hitset_bytes_stats_invalid": false,
                "pin_stats_invalid": false,
                "manifest_stats_invalid": true,
                "snaptrimq_len": 0,
                "stat_sum": {
                    "num_bytes": 16701889536,
                    "num_objects": 4153,
                    "num_object_clones": 147,
                    "num_object_copies": 12459,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_missing": 1,
                    "num_objects_degraded": 4153,
                    "num_objects_misplaced": 0,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 4153,
                    "num_whiteouts": 37,
                    "num_read": 10708344,
                    "num_read_kb": 271918170,
                    "num_write": 112158077,
                    "num_write_kb": 1708980316,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 16176,
                    "num_bytes_recovered": 39993823744,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0,
                    "num_flush": 0,
                    "num_flush_kb": 0,
                    "num_evict": 0,
                    "num_evict_kb": 0,
                    "num_promote": 0,
                    "num_flush_mode_high": 0,
                    "num_flush_mode_low": 0,
                    "num_evict_mode_some": 0,
                    "num_evict_mode_full": 0,
                    "num_objects_pinned": 0,
                    "num_legacy_snapsets": 0,
                    "num_large_omap_objects": 0,
                    "num_objects_manifest": 0,
                    "num_omap_bytes": 0,
                    "num_omap_keys": 0,
                    "num_objects_repaired": 0
                },
                "up": [
                    15,
                    1,
                    11
                ],
                "acting": [
                    15,
                    1,
                    11
                ],
                "avail_no_missing": [
                    "1",
                    "11"
                ],
                "object_location_counts": [
                    {
                        "shards": "1,11",
                        "objects": 4153
                    }
                ],
                "blocked_by": [],
                "up_primary": 15,
                "acting_primary": 15,
                "purged_snaps": []
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 0,
            "last_epoch_started": 184402,
            "hit_set_history": {
                "current_last_update": "0'0",
                "history": []
            }
        },
        {
            "peer": "11",
            "pgid": "7.182",
            "last_update": "184487'118622945",
            "last_complete": "184487'118622945",
            "log_tail": "184250'118614019",
            "last_user_version": 174803123,
            "last_backfill": "MAX",
            "purged_snaps": [],
            "history": {
                "epoch_created": 80613,
                "epoch_pool_created": 826,
                "last_epoch_started": 184402,
                "last_interval_started": 184401,
                "last_epoch_clean": 184066,
                "last_interval_clean": 184056,
                "last_epoch_split": 80613,
                "last_epoch_marked_full": 0,
                "same_up_since": 184401,
                "same_interval_since": 184401,
                "same_primary_since": 184401,
                "last_scrub": "184250'118615197",
                "last_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_deep_scrub": "184250'118615197",
                "last_deep_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_clean_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "prior_readable_until_ub": 12.742730181000001
            },
            "stats": {
                "version": "184387'118621009",
                "reported_seq": "126995639",
                "reported_epoch": "184387",
                "state": "active+undersized+degraded",
                "last_fresh": "2021-04-28T23:39:43.291274+0300",
                "last_change": "2021-04-28T23:39:04.307035+0300",
                "last_active": "2021-04-28T23:39:43.291274+0300",
                "last_peered": "2021-04-28T23:39:43.291274+0300",
                "last_clean": "2021-04-28T21:24:38.946369+0300",
                "last_became_active": "2021-04-28T23:39:04.307035+0300",
                "last_became_peered": "2021-04-28T23:39:04.307035+0300",
                "last_unstale": "2021-04-28T23:39:43.291274+0300",
                "last_undegraded": "2021-04-28T23:39:04.305618+0300",
                "last_fullsized": "2021-04-28T23:39:04.305487+0300",
                "mapping_epoch": 184401,
                "log_start": "184250'118614019",
                "ondisk_log_start": "184250'118614019",
                "created": 80613,
                "last_epoch_clean": 184066,
                "parent": "0.0",
                "parent_split_bits": 9,
                "last_scrub": "184250'118615197",
                "last_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_deep_scrub": "184250'118615197",
                "last_deep_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "last_clean_scrub_stamp": "2021-04-28T21:24:42.693619+0300",
                "log_size": 6990,
                "ondisk_log_size": 6990,
                "stats_invalid": false,
                "dirty_stats_invalid": false,
                "omap_stats_invalid": false,
                "hitset_stats_invalid": false,
                "hitset_bytes_stats_invalid": false,
                "pin_stats_invalid": false,
                "manifest_stats_invalid": true,
                "snaptrimq_len": 0,
                "stat_sum": {
                    "num_bytes": 16701889536,
                    "num_objects": 4153,
                    "num_object_clones": 147,
                    "num_object_copies": 12459,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_missing": 1,
                    "num_objects_degraded": 4153,
                    "num_objects_misplaced": 0,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 4153,
                    "num_whiteouts": 37,
                    "num_read": 10708344,
                    "num_read_kb": 271918170,
                    "num_write": 112158077,
                    "num_write_kb": 1708980316,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 16176,
                    "num_bytes_recovered": 39993823744,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0,
                    "num_flush": 0,
                    "num_flush_kb": 0,
                    "num_evict": 0,
                    "num_evict_kb": 0,
                    "num_promote": 0,
                    "num_flush_mode_high": 0,
                    "num_flush_mode_low": 0,
                    "num_evict_mode_some": 0,
                    "num_evict_mode_full": 0,
                    "num_objects_pinned": 0,
                    "num_legacy_snapsets": 0,
                    "num_large_omap_objects": 0,
                    "num_objects_manifest": 0,
                    "num_omap_bytes": 0,
                    "num_omap_keys": 0,
                    "num_objects_repaired": 0
                },
                "up": [
                    15,
                    1,
                    11
                ],
                "acting": [
                    15,
                    1,
                    11
                ],
                "avail_no_missing": [
                    "1",
                    "11"
                ],
                "object_location_counts": [
                    {
                        "shards": "1,11",
                        "objects": 4153
                    }
                ],
                "blocked_by": [],
                "up_primary": 15,
                "acting_primary": 15,
                "purged_snaps": []
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 0,
            "last_epoch_started": 184402,
            "hit_set_history": {
                "current_last_update": "0'0",
                "history": []
            }
        }
    ],
    "recovery_state": [
        {
            "name": "Started/Primary/Active",
            "enter_time": "2021-04-28T23:40:03.531367+0300",
            "might_have_unfound": [
                {
                    "osd": "1",
                    "status": "already probed"
                },
                {
                    "osd": "11",
                    "status": "already probed"
                }
            ],
            "recovery_progress": {
                "backfill_targets": [],
                "waiting_on_backfill": [],
                "last_backfill_started": "MIN",
                "backfill_info": {
                    "begin": "MIN",
                    "end": "MIN",
                    "objects": []
                },
                "peer_backfill_info": [],
                "backfills_in_flight": [],
                "recovering": [],
                "pg_backend": {
                    "pull_from_peer": [],
                    "pushing": []
                }
            }
        },
        {
            "name": "Started",
            "enter_time": "2021-04-28T23:40:03.025845+0300"
        }
    ],
    "scrubber": {
        "epoch_start": "0",
        "active": false
    },
    "agent_state": {}
}

_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux