Re: incomplete pgs - cannot clear

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I cut out a HUGE list of "purged_snaps" to keep this a little shorter...

$ cat 1.10e.txt
{
    "state": "incomplete",
    "snap_trimq": "[]",
    "snap_trimq_len": 0,
    "epoch": 465904,
    "up": [
        52,
        23,
        20
    ],
    "acting": [
        52,
        23,
        20
    ],
    "info": {
        "pgid": "1.10e",
        "last_update": "438490'293946",
        "last_complete": "438490'293946",
        "log_tail": "427182'292446",
        "last_user_version": 0,
        "last_backfill": "MIN",
        "last_backfill_bitwise": 1,
        "purged_snaps": [
            {
                "start": "2",
                "length": "12cd"
            },
            {
                "start": "12d0",
                "length": "1fca"
            },
... lots of snaps ...
        ],
        "history": {
            "epoch_created": 22654,
            "epoch_pool_created": 22654,
            "last_epoch_started": 447973,
            "last_interval_started": 447972,
            "last_epoch_clean": 438832,
            "last_interval_clean": 438831,
            "last_epoch_split": 0,
            "last_epoch_marked_full": 0,
            "same_up_since": 465900,
            "same_interval_since": 465901,
            "same_primary_since": 465901,
            "last_scrub": "438490'293946",
            "last_scrub_stamp": "2018-06-12 00:10:55.825562",
            "last_deep_scrub": "427203'293886",
            "last_deep_scrub_stamp": "2018-06-07 01:46:27.403211",
            "last_clean_scrub_stamp": "2018-06-12 00:10:55.825562"
        },
        "stats": {
            "version": "438490'293946",
            "reported_seq": "69672",
            "reported_epoch": "465904",
            "state": "incomplete",
            "last_fresh": "2018-06-14 11:51:52.770692",
            "last_change": "2018-06-14 11:51:52.770692",
            "last_active": "0.000000",
            "last_peered": "0.000000",
            "last_clean": "0.000000",
            "last_became_active": "0.000000",
            "last_became_peered": "0.000000",
            "last_unstale": "2018-06-14 11:51:52.770692",
            "last_undegraded": "2018-06-14 11:51:52.770692",
            "last_fullsized": "2018-06-14 11:51:52.770692",
            "mapping_epoch": 465901,
            "log_start": "427182'292446",
            "ondisk_log_start": "427182'292446",
            "created": 22654,
            "last_epoch_clean": 438832,
            "parent": "0.0",
            "parent_split_bits": 0,
            "last_scrub": "438490'293946",
            "last_scrub_stamp": "2018-06-12 00:10:55.825562",
            "last_deep_scrub": "427203'293886",
            "last_deep_scrub_stamp": "2018-06-07 01:46:27.403211",
            "last_clean_scrub_stamp": "2018-06-12 00:10:55.825562",
            "log_size": 1500,
            "ondisk_log_size": 1500,
            "stats_invalid": false,
            "dirty_stats_invalid": false,
            "omap_stats_invalid": false,
            "hitset_stats_invalid": false,
            "hitset_bytes_stats_invalid": false,
            "pin_stats_invalid": false,
            "snaptrimq_len": 0,
            "stat_sum": {
                "num_bytes": 0,
                "num_objects": 0,
                "num_object_clones": 0,
                "num_object_copies": 0,
                "num_objects_missing_on_primary": 0,
                "num_objects_missing": 0,
                "num_objects_degraded": 0,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 0,
                "num_objects_dirty": 0,
                "num_whiteouts": 0,
                "num_read": 0,
                "num_read_kb": 0,
                "num_write": 0,
                "num_write_kb": 0,
                "num_scrub_errors": 0,
                "num_shallow_scrub_errors": 0,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 0,
                "num_bytes_recovered": 0,
                "num_keys_recovered": 0,
                "num_objects_omap": 0,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0,
                "num_flush": 0,
                "num_flush_kb": 0,
                "num_evict": 0,
                "num_evict_kb": 0,
                "num_promote": 0,
                "num_flush_mode_high": 0,
                "num_flush_mode_low": 0,
                "num_evict_mode_some": 0,
                "num_evict_mode_full": 0,
                "num_objects_pinned": 0,
                "num_legacy_snapsets": 0
            },
            "up": [
                52,
                23,
                20
            ],
            "acting": [
                52,
                23,
                20
            ],
            "blocked_by": [
                65,
                100,
                101,
                107
            ],
            "up_primary": 52,
            "acting_primary": 52
        },
        "empty": 0,
        "dne": 0,
        "incomplete": 1,
        "last_epoch_started": 447973,
        "hit_set_history": {
            "current_last_update": "0'0",
            "history": []
        }
    },
    "peer_info": [
        {
            "peer": "5",
            "pgid": "1.10e",
            "last_update": "438490'293946",
            "last_complete": "438490'293946",
            "log_tail": "427182'292446",
            "last_user_version": 0,
            "last_backfill": "MIN",
            "last_backfill_bitwise": 1,
            "purged_snaps": [
                {
                    "start": "2",
                    "length": "12cd"
                },
                {
                    "start": "12d0",
                    "length": "1fca"
                },
... lots of snaps ...
            ],
            "history": {
                "epoch_created": 22654,
                "epoch_pool_created": 22654,
                "last_epoch_started": 447973,
                "last_interval_started": 447972,
                "last_epoch_clean": 438832,
                "last_interval_clean": 438831,
                "last_epoch_split": 0,
                "last_epoch_marked_full": 0,
                "same_up_since": 465900,
                "same_interval_since": 465901,
                "same_primary_since": 465901,
                "last_scrub": "438490'293946",
                "last_scrub_stamp": "2018-06-12 00:10:55.825562",
                "last_deep_scrub": "427203'293886",
                "last_deep_scrub_stamp": "2018-06-07 01:46:27.403211",
                "last_clean_scrub_stamp": "2018-06-12 00:10:55.825562"
            },
            "stats": {
                "version": "438490'293946",
                "reported_seq": "58224",
                "reported_epoch": "460636",
                "state": "peering",
                "last_fresh": "2018-06-14 09:07:43.914677",
                "last_change": "2018-06-14 09:07:43.580029",
                "last_active": "0.000000",
                "last_peered": "0.000000",
                "last_clean": "0.000000",
                "last_became_active": "0.000000",
                "last_became_peered": "0.000000",
                "last_unstale": "2018-06-14 09:07:43.914677",
                "last_undegraded": "2018-06-14 09:07:43.914677",
                "last_fullsized": "2018-06-14 09:07:43.914677",
                "mapping_epoch": 465901,
                "log_start": "427182'292446",
                "ondisk_log_start": "427182'292446",
                "created": 22654,
                "last_epoch_clean": 438832,
                "parent": "0.0",
                "parent_split_bits": 0,
                "last_scrub": "438490'293946",
                "last_scrub_stamp": "2018-06-12 00:10:55.825562",
                "last_deep_scrub": "427203'293886",
                "last_deep_scrub_stamp": "2018-06-07 01:46:27.403211",
                "last_clean_scrub_stamp": "2018-06-12 00:10:55.825562",
                "log_size": 1500,
                "ondisk_log_size": 1500,
                "stats_invalid": false,
                "dirty_stats_invalid": false,
                "omap_stats_invalid": false,
                "hitset_stats_invalid": false,
                "hitset_bytes_stats_invalid": false,
                "pin_stats_invalid": false,
                "snaptrimq_len": 0,
                "stat_sum": {
                    "num_bytes": 0,
                    "num_objects": 0,
                    "num_object_clones": 0,
                    "num_object_copies": 0,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_missing": 0,
                    "num_objects_degraded": 0,
                    "num_objects_misplaced": 0,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 0,
                    "num_whiteouts": 0,
                    "num_read": 0,
                    "num_read_kb": 0,
                    "num_write": 0,
                    "num_write_kb": 0,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 0,
                    "num_bytes_recovered": 0,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0,
                    "num_flush": 0,
                    "num_flush_kb": 0,
                    "num_evict": 0,
                    "num_evict_kb": 0,
                    "num_promote": 0,
                    "num_flush_mode_high": 0,
                    "num_flush_mode_low": 0,
                    "num_evict_mode_some": 0,
                    "num_evict_mode_full": 0,
                    "num_objects_pinned": 0,
                    "num_legacy_snapsets": 0
                },
                "up": [
                    52,
                    23,
                    20
                ],
                "acting": [
                    52,
                    23,
                    20
                ],
                "blocked_by": [
                    20,
                    23,
                    30,
                    107
                ],
                "up_primary": 52,
                "acting_primary": 52
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 1,
            "last_epoch_started": 447973,
            "hit_set_history": {
                "current_last_update": "0'0",
                "history": []
            }
        },
        {
            "peer": "10",
            "pgid": "1.10e",
            "last_update": "438490'293946",
            "last_complete": "438490'293946",
            "log_tail": "427182'292446",
            "last_user_version": 0,
            "last_backfill": "MIN",
            "last_backfill_bitwise": 1,
            "purged_snaps": [
                {
                    "start": "2",
                    "length": "12cd"
                },
                {
                    "start": "12d0",
                    "length": "1fca"
                },
... lots of snaps ...
            ],
            "history": {
                "epoch_created": 22654,
                "epoch_pool_created": 22654,
                "last_epoch_started": 447973,
                "last_interval_started": 447972,
                "last_epoch_clean": 438832,
                "last_interval_clean": 438831,
                "last_epoch_split": 0,
                "last_epoch_marked_full": 0,
                "same_up_since": 465900,
                "same_interval_since": 465901,
                "same_primary_since": 465901,
                "last_scrub": "438490'293946",
                "last_scrub_stamp": "2018-06-12 00:10:55.825562",
                "last_deep_scrub": "427203'293886",
                "last_deep_scrub_stamp": "2018-06-07 01:46:27.403211",
                "last_clean_scrub_stamp": "2018-06-12 00:10:55.825562"
            },
            "stats": {
                "version": "0'0",
                "reported_seq": "0",
                "reported_epoch": "0",
                "state": "unknown",
                "last_fresh": "0.000000",
                "last_change": "0.000000",
                "last_active": "0.000000",
                "last_peered": "0.000000",
                "last_clean": "0.000000",
                "last_became_active": "0.000000",
                "last_became_peered": "0.000000",
                "last_unstale": "0.000000",
                "last_undegraded": "0.000000",
                "last_fullsized": "0.000000",
                "mapping_epoch": 465901,
                "log_start": "0'0",
                "ondisk_log_start": "0'0",
                "created": 0,
                "last_epoch_clean": 0,
                "parent": "0.0",
                "parent_split_bits": 0,
                "last_scrub": "0'0",
                "last_scrub_stamp": "0.000000",
                "last_deep_scrub": "0'0",
                "last_deep_scrub_stamp": "0.000000",
                "last_clean_scrub_stamp": "0.000000",
                "log_size": 0,
                "ondisk_log_size": 0,
                "stats_invalid": false,
                "dirty_stats_invalid": false,
                "omap_stats_invalid": false,
                "hitset_stats_invalid": false,
                "hitset_bytes_stats_invalid": false,
                "pin_stats_invalid": false,
                "snaptrimq_len": 0,
                "stat_sum": {
                    "num_bytes": 0,
                    "num_objects": 0,
                    "num_object_clones": 0,
                    "num_object_copies": 0,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_missing": 0,
                    "num_objects_degraded": 0,
                    "num_objects_misplaced": 0,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 0,
                    "num_whiteouts": 0,
                    "num_read": 0,
                    "num_read_kb": 0,
                    "num_write": 0,
                    "num_write_kb": 0,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 0,
                    "num_bytes_recovered": 0,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0,
                    "num_flush": 0,
                    "num_flush_kb": 0,
                    "num_evict": 0,
                    "num_evict_kb": 0,
                    "num_promote": 0,
                    "num_flush_mode_high": 0,
                    "num_flush_mode_low": 0,
                    "num_evict_mode_some": 0,
                    "num_evict_mode_full": 0,
                    "num_objects_pinned": 0,
                    "num_legacy_snapsets": 0
                },
                "up": [
                    52,
                    23,
                    20
                ],
                "acting": [
                    52,
                    23,
                    20
                ],
                "blocked_by": [],
                "up_primary": 52,
                "acting_primary": 52
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 1,
            "last_epoch_started": 447587,
            "hit_set_history": {
                "current_last_update": "0'0",
                "history": []
            }
        }
    ],
    "recovery_state": [
        {
            "name": "Started/Primary/Peering/Incomplete",
            "enter_time": "2018-06-14 11:51:52.770682",
            "comment": "not enough complete instances of this PG"
        },
        {
            "name": "Started/Primary/Peering",
            "enter_time": "2018-06-14 11:51:52.745649",
            "past_intervals": [
                {
                    "first": "438831",
                    "last": "465900",
                    "all_participants": [
                        {
                            "osd": 20
                        },
                        {
                            "osd": 23
                        },
                        {
                            "osd": 30
                        },
                        {
                            "osd": 52
                        },
                        {
                            "osd": 65
                        },
                        {
                            "osd": 100
                        },
                        {
                            "osd": 101
                        },
                        {
                            "osd": 107
                        }
                    ],
                    "intervals": [
                        {
                            "first": "447972",
                            "last": "447978",
                            "acting": "100,101"
                        },
                        {
                            "first": "455481",
                            "last": "455502",
                            "acting": "52,107"
                        },
                        {
                            "first": "455748",
                            "last": "455905",
                            "acting": "23,107"
                        },
                        {
                            "first": "462150",
                            "last": "462151",
                            "acting": "20"
                        },
                        {
                            "first": "465118",
                            "last": "465119",
                            "acting": "23,52"
                        },
                        {
                            "first": "465815",
                            "last": "465816",
                            "acting": "20,23,52"
                        }
                    ]
                }
            ],
            "probing_osds": [
                "20",
                "23",
                "30",
                "52"
            ],
            "down_osds_we_would_probe": [
                65,
                100,
                101,
                107
            ],
            "peering_blocked_by": [],
            "peering_blocked_by_detail": [
                {
                    "detail": "peering_blocked_by_history_les_bound"
                }
            ]
        },
        {
            "name": "Started",
            "enter_time": "2018-06-14 11:51:52.745611"
        }
    ],
    "agent_state": {}
}

On Thu, Jun 14, 2018 at 11:53 AM, Sage Weil <sage@xxxxxxxxxxxx> wrote:
> On Thu, 14 Jun 2018, Wyllys Ingersoll wrote:
>> Yes, I did have the ignore_history_les_option set for 2 of the running
>> osds, but I disabled and restarted the affected osds and this is where
>> it ends up:
>>
>>             "probing_osds": [
>>                 "20",
>>                 "23",
>>                 "30",
>>                 "52"
>>             ],
>>             "down_osds_we_would_probe": [
>>                 65,
>>                 100,
>>                 101,
>>                 107
>>             ],
>>             "peering_blocked_by": [],
>>             "peering_blocked_by_detail": [
>>                 {
>>                     "detail": "peering_blocked_by_history_les_bound"
>>                 }
>>             ]
>>
>>
>> The 'down_osds_we_would_probe' are all non-existent.  This is where I
>> started the day, still cant get past it.  And this is seen on all of
>> the incomplete pgs, this is just 1 example.
>
> Post the full query from this state?
>
> sage
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux