Re: The effect of changing an osd's class

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



All the problem pg's are on osd.39.  When I stop osd.39, it shows 86 pg's would be offline.  However, there is no recovery that happens.  It just stays there.  86 undersized+remapped+peered

I managed to pin down all the pg groups that are in this state by using:

ceph pg dump | grep active+clean+remapped

From there I queried the first pg on the list:

NodeC:~# ceph pg 28.42 query
{
    "snap_trimq": "[]",
    "snap_trimq_len": 0,
    "state": "active+clean+remapped",
    "epoch": 490547,
    "up": [],
    "acting": [
        39,
        1
    ],
    "acting_recovery_backfill": [
        "1",
        "39"
    ],
    "info": {
        "pgid": "28.42",
        "last_update": "489784'67",
        "last_complete": "489784'67",
        "log_tail": "0'0",
        "last_user_version": 67,
        "last_backfill": "MAX",
        "purged_snaps": [],
        "history": {
            "epoch_created": 487438,
            "epoch_pool_created": 487438,
            "last_epoch_started": 490544,
            "last_interval_started": 490543,
            "last_epoch_clean": 490544,
            "last_interval_clean": 490543,
            "last_epoch_split": 0,
            "last_epoch_marked_full": 0,
            "same_up_since": 489999,
            "same_interval_since": 490543,
            "same_primary_since": 490543,
            "last_scrub": "489784'67",
            "last_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
            "last_deep_scrub": "0'0",
            "last_deep_scrub_stamp": "2024-11-12T16:55:34.967587+0200",
            "last_clean_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
            "prior_readable_until_ub": 0
        },
        "stats": {
            "version": "489784'67",
            "reported_seq": 721,
            "reported_epoch": 490547,
            "state": "active+clean+remapped",
            "last_fresh": "2024-11-15T16:07:08.111992+0200",
            "last_change": "2024-11-15T15:01:12.081136+0200",
            "last_active": "2024-11-15T16:07:08.111992+0200",
            "last_peered": "2024-11-15T16:07:08.111992+0200",
            "last_clean": "2024-11-15T16:07:08.111992+0200",
            "last_became_active": "2024-11-15T15:01:12.080691+0200",
            "last_became_peered": "2024-11-15T15:01:12.080691+0200",
            "last_unstale": "2024-11-15T16:07:08.111992+0200",
            "last_undegraded": "2024-11-15T16:07:08.111992+0200",
            "last_fullsized": "2024-11-15T16:07:08.111992+0200",
            "mapping_epoch": 490543,
            "log_start": "0'0",
            "ondisk_log_start": "0'0",
            "created": 487438,
            "last_epoch_clean": 490544,
            "parent": "0.0",
            "parent_split_bits": 0,
            "last_scrub": "489784'67",
            "last_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
            "last_deep_scrub": "0'0",
            "last_deep_scrub_stamp": "2024-11-12T16:55:34.967587+0200",
            "last_clean_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
            "objects_scrubbed": 0,
            "log_size": 67,
            "ondisk_log_size": 67,
            "stats_invalid": false,
            "dirty_stats_invalid": false,
            "omap_stats_invalid": false,
            "hitset_stats_invalid": false,
            "hitset_bytes_stats_invalid": false,
            "pin_stats_invalid": false,
            "manifest_stats_invalid": false,
            "snaptrimq_len": 0,
            "last_scrub_duration": 1,
            "scrub_schedule": "periodic scrub scheduled @ 2024-11-16T17:19:20.646231+0000",
            "scrub_duration": 0.024065349,
            "objects_trimmed": 0,
            "snaptrim_duration": 0.090716250999999998,
            "stat_sum": {
                "num_bytes": 0,
                "num_objects": 0,
                "num_object_clones": 0,
                "num_object_copies": 0,
                "num_objects_missing_on_primary": 0,
                "num_objects_missing": 0,
                "num_objects_degraded": 0,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 0,
                "num_objects_dirty": 0,
                "num_whiteouts": 0,
                "num_read": 68,
                "num_read_kb": 708,
                "num_write": 67,
                "num_write_kb": 19088,
                "num_scrub_errors": 0,
                "num_shallow_scrub_errors": 0,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 12,
                "num_bytes_recovered": 26607616,
                "num_keys_recovered": 0,
                "num_objects_omap": 0,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0,
                "num_flush": 0,
                "num_flush_kb": 0,
                "num_evict": 0,
                "num_evict_kb": 0,
                "num_promote": 0,
                "num_flush_mode_high": 0,
                "num_flush_mode_low": 0,
                "num_evict_mode_some": 0,
                "num_evict_mode_full": 0,
                "num_objects_pinned": 0,
                "num_legacy_snapsets": 0,
                "num_large_omap_objects": 0,
                "num_objects_manifest": 0,
                "num_omap_bytes": 0,
                "num_omap_keys": 0,
                "num_objects_repaired": 0
            },
            "up": [],
            "acting": [
                39,
                1
            ],
            "avail_no_missing": [
                "39",
                "1"
            ],
            "object_location_counts": [],
            "blocked_by": [],
            "up_primary": -1,
            "acting_primary": 39,
            "purged_snaps": []
        },
        "empty": 0,
        "dne": 0,
        "incomplete": 0,
        "last_epoch_started": 490544,
        "hit_set_history": {
            "current_last_update": "0'0",
            "history": []
        }
    },
    "peer_info": [
        {
            "peer": "1",
            "pgid": "28.42",
            "last_update": "489784'67",
            "last_complete": "489784'67",
            "log_tail": "0'0",
            "last_user_version": 67,
            "last_backfill": "MAX",
            "purged_snaps": [],
            "history": {
                "epoch_created": 487438,
                "epoch_pool_created": 487438,
                "last_epoch_started": 490544,
                "last_interval_started": 490543,
                "last_epoch_clean": 490544,
                "last_interval_clean": 490543,
                "last_epoch_split": 0,
                "last_epoch_marked_full": 0,
                "same_up_since": 489999,
                "same_interval_since": 490543,
                "same_primary_since": 490543,
                "last_scrub": "489784'67",
                "last_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
                "last_deep_scrub": "0'0",
                "last_deep_scrub_stamp": "2024-11-12T16:55:34.967587+0200",
                "last_clean_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
                "prior_readable_until_ub": 0
            },
            "stats": {
                "version": "489784'67",
                "reported_seq": 430,
                "reported_epoch": 490542,
                "state": "undersized+remapped+peered",
                "last_fresh": "2024-11-15T15:01:10.046703+0200",
                "last_change": "2024-11-15T15:00:37.072082+0200",
                "last_active": "2024-11-14T15:41:06.074260+0200",
                "last_peered": "2024-11-15T15:01:10.046703+0200",
                "last_clean": "2024-11-14T15:41:03.945754+0200",
                "last_became_active": "2024-11-14T15:39:15.997707+0200",
                "last_became_peered": "2024-11-15T15:00:37.072082+0200",
                "last_unstale": "2024-11-15T15:01:10.046703+0200",
                "last_undegraded": "2024-11-15T15:01:10.046703+0200",
                "last_fullsized": "2024-11-15T15:00:37.069250+0200",
                "mapping_epoch": 490543,
                "log_start": "0'0",
                "ondisk_log_start": "0'0",
                "created": 487438,
                "last_epoch_clean": 490528,
                "parent": "0.0",
                "parent_split_bits": 0,
                "last_scrub": "489784'67",
                "last_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
                "last_deep_scrub": "0'0",
                "last_deep_scrub_stamp": "2024-11-12T16:55:34.967587+0200",
                "last_clean_scrub_stamp": "2024-11-15T09:16:59.715671+0200",
                "objects_scrubbed": 0,
                "log_size": 67,
                "ondisk_log_size": 67,
                "stats_invalid": false,
                "dirty_stats_invalid": false,
                "omap_stats_invalid": false,
                "hitset_stats_invalid": false,
                "hitset_bytes_stats_invalid": false,
                "pin_stats_invalid": false,
                "manifest_stats_invalid": false,
                "snaptrimq_len": 0,
                "last_scrub_duration": 0,
                "scrub_schedule": "periodic scrub scheduled @ 2024-11-16T12:15:01.587848+0000",
                "scrub_duration": 0,
                "objects_trimmed": 0,
                "snaptrim_duration": 0.090716250999999998,
                "stat_sum": {
                    "num_bytes": 0,
                    "num_objects": 0,
                    "num_object_clones": 0,
                    "num_object_copies": 0,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_missing": 0,
                    "num_objects_degraded": 0,
                    "num_objects_misplaced": 0,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 0,
                    "num_whiteouts": 0,
                    "num_read": 68,
                    "num_read_kb": 708,
                    "num_write": 67,
                    "num_write_kb": 19088,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 7,
                    "num_bytes_recovered": 26607616,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0,
                    "num_flush": 0,
                    "num_flush_kb": 0,
                    "num_evict": 0,
                    "num_evict_kb": 0,
                    "num_promote": 0,
                    "num_flush_mode_high": 0,
                    "num_flush_mode_low": 0,
                    "num_evict_mode_some": 0,
                    "num_evict_mode_full": 0,
                    "num_objects_pinned": 0,
                    "num_legacy_snapsets": 0,
                    "num_large_omap_objects": 0,
                    "num_objects_manifest": 0,
                    "num_omap_bytes": 0,
                    "num_omap_keys": 0,
                    "num_objects_repaired": 0
                },
                "up": [],
                "acting": [
                    39,
                    1
                ],
                "avail_no_missing": [
                    "1"
                ],
                "object_location_counts": [],
                "blocked_by": [],
                "up_primary": -1,
                "acting_primary": 39,
                "purged_snaps": []
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 0,
            "last_epoch_started": 490544,
            "hit_set_history": {
                "current_last_update": "0'0",
                "history": []
            }
        }
    ],
    "recovery_state": [
        {
            "name": "Started/Primary/Active",
            "enter_time": "2024-11-15T15:01:12.073557+0200",
            "might_have_unfound": [],
            "recovery_progress": {
                "backfill_targets": [],
                "waiting_on_backfill": [],
                "last_backfill_started": "MIN",
                "backfill_info": {
                    "begin": "MIN",
                    "end": "MIN",
                    "objects": []
                },
                "peer_backfill_info": [],
                "backfills_in_flight": [],
                "recovering": [],
                "pg_backend": {
                    "pull_from_peer": [],
                    "pushing": []
                }
            }
        },
        {
            "name": "Started",
            "enter_time": "2024-11-15T15:01:11.069697+0200"
        }
    ],
    "scrubber": {
        "active": false,
        "must_scrub": false,
        "must_deep_scrub": false,
        "must_repair": false,
        "need_auto": false,
        "scrub_reg_stamp": "2024-11-16T19:19:20.646231+0200",
        "schedule": "scrub scheduled @ 2024-11-16T17:19:20.646231+0000"
    },
    "agent_state": {}
}

Can anyone see from this what the reason may be that this (and the other pg's) are stuck on this osd?


On 2024/11/15 13:36, Roland Giesler wrote:
On 2024/11/15 13:00, Gregory Orange wrote:
On 15/11/24 17:11, Roland Giesler wrote:
How do I determine the primary osd?
ceph pg map $pg

ceph pg $pg query | jq .info.stats.acting_primary

You can jq and less to take a look at other values which might be
informative too.

Ah, of course :-)  Sorry, I was looking for the primary osd of a pool! No wonder I couldn't find anything.  LOL!


Greg.
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx




[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux