ceph pgs inconsistent, always the same checksum

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I've got a ceph cluster, 7 nodes, 168 OSDs, with 96G of ram on each server.
Ceph has been instructed to set a memory target of 3G until we increase RAM
to 128G per node. Available memory tends to hover around 14G. I do see a
tiny bit (KB) of swap utilization per ceph-osd process, but there's no
reason for it, so unsure what that's about:

root@ceph02:~# cat /proc/14363/status |egrep 'Name|VmSwap'

*Name*: ceph-osd

*VmSwap*:     464 kB

We're seeing repeated inconsistent PG warnings, generally on the order of
3-10 per week.

    pg 2.b9 is active+clean+inconsistent, acting [25,117,128,95,151,15]

PG query on that PG:

INFO:cephadm:Using recent ceph image docker.io/ceph/ceph:v15

{

    "snap_trimq": "[]",

    "snap_trimq_len": 0,

    "state": "active+clean+inconsistent",

    "epoch": 20278,

    "up": [

        25,

        117,

        128,

        95,

        151,

        15

    ],

    "acting": [

        25,

        117,

        128,

        95,

        151,

        15

    ],

    "acting_recovery_backfill": [

        "15(5)",

        "25(0)",

        "95(3)",

        "117(1)",

        "128(2)",

        "151(4)"

    ],

    "info": {

        "pgid": "2.b9s0",

        "last_update": "20278'445510",

        "last_complete": "20278'445510",

        "log_tail": "20278'438137",

        "last_user_version": 445510,

        "last_backfill": "MAX",

        "purged_snaps": [],

        "history": {

            "epoch_created": 573,

            "epoch_pool_created": 100,

            "last_epoch_started": 14679,

            "last_interval_started": 14678,

            "last_epoch_clean": 14716,

            "last_interval_clean": 14678,

            "last_epoch_split": 573,

            "last_epoch_marked_full": 0,

            "same_up_since": 14678,

            "same_interval_since": 14678,

            "same_primary_since": 14396,

            "last_scrub": "20278'444009",

            "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

            "last_deep_scrub": "20278'444009",

            "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

            "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

            "prior_readable_until_ub": 0

        },

        "stats": {

            "version": "20278'445510",

            "reported_seq": "896803",

            "reported_epoch": "20278",

            "state": "active+clean+inconsistent",

            "last_fresh": "2020-09-08T18:06:45.463880+0000",

            "last_change": "2020-09-08T16:57:22.430293+0000",

            "last_active": "2020-09-08T18:06:45.463880+0000",

            "last_peered": "2020-09-08T18:06:45.463880+0000",

            "last_clean": "2020-09-08T18:06:45.463880+0000",

            "last_became_active": "2020-08-06T19:35:02.634999+0000",

            "last_became_peered": "2020-08-06T19:35:02.634999+0000",

            "last_unstale": "2020-09-08T18:06:45.463880+0000",

            "last_undegraded": "2020-09-08T18:06:45.463880+0000",

            "last_fullsized": "2020-09-08T18:06:45.463880+0000",

            "mapping_epoch": 14678,

            "log_start": "20278'438137",

            "ondisk_log_start": "20278'438137",

            "created": 573,

            "last_epoch_clean": 14716,

            "parent": "0.0",

            "parent_split_bits": 10,

            "last_scrub": "20278'444009",

            "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

            "last_deep_scrub": "20278'444009",

            "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

            "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

            "log_size": 7373,

            "ondisk_log_size": 7373,

            "stats_invalid": false,

            "dirty_stats_invalid": false,

            "omap_stats_invalid": false,

            "hitset_stats_invalid": false,

            "hitset_bytes_stats_invalid": false,

            "pin_stats_invalid": false,

            "manifest_stats_invalid": false,

            "snaptrimq_len": 0,

            "stat_sum": {

                "num_bytes": 322985947136,

                "num_objects": 78724,

                "num_object_clones": 0,

                "num_object_copies": 472344,

                "num_objects_missing_on_primary": 0,

                "num_objects_missing": 0,

                "num_objects_degraded": 0,

                "num_objects_misplaced": 0,

                "num_objects_unfound": 0,

                "num_objects_dirty": 78724,

                "num_whiteouts": 0,

                "num_read": 430713,

                "num_read_kb": 121695928,

                "num_write": 445501,

                "num_write_kb": 405283436,

                "num_scrub_errors": 1,

                "num_shallow_scrub_errors": 0,

                "num_deep_scrub_errors": 1,

                "num_objects_recovered": 21,

                "num_bytes_recovered": 88080384,

                "num_keys_recovered": 0,

                "num_objects_omap": 0,

                "num_objects_hit_set_archive": 0,

                "num_bytes_hit_set_archive": 0,

                "num_flush": 0,

                "num_flush_kb": 0,

                "num_evict": 0,

                "num_evict_kb": 0,

                "num_promote": 0,

                "num_flush_mode_high": 0,

                "num_flush_mode_low": 0,

                "num_evict_mode_some": 0,

                "num_evict_mode_full": 0,

                "num_objects_pinned": 0,

                "num_legacy_snapsets": 0,

                "num_large_omap_objects": 0,

                "num_objects_manifest": 0,

                "num_omap_bytes": 0,

                "num_omap_keys": 0,

                "num_objects_repaired": 0

            },

            "up": [

                25,

                117,

                128,

                95,

                151,

                15

            ],

            "acting": [

                25,

                117,

                128,

                95,

                151,

                15

            ],

            "avail_no_missing": [],

            "object_location_counts": [],

            "blocked_by": [],

            "up_primary": 25,

            "acting_primary": 25,

            "purged_snaps": []

        },

        "empty": 0,

        "dne": 0,

        "incomplete": 0,

        "last_epoch_started": 14679,

        "hit_set_history": {

            "current_last_update": "0'0",

            "history": []

        }

    },

    "peer_info": [

        {

            "peer": "15(5)",

            "pgid": "2.b9s5",

            "last_update": "20278'445510",

            "last_complete": "18934'278187",

            "log_tail": "14173'104284",

            "last_user_version": 111692,

            "last_backfill": "MAX",

            "purged_snaps": [],

            "history": {

                "epoch_created": 573,

                "epoch_pool_created": 100,

                "last_epoch_started": 14679,

                "last_interval_started": 14678,

                "last_epoch_clean": 14716,

                "last_interval_clean": 14678,

                "last_epoch_split": 573,

                "last_epoch_marked_full": 0,

                "same_up_since": 14678,

                "same_interval_since": 14678,

                "same_primary_since": 14396,

                "last_scrub": "20278'444009",

                "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_deep_scrub": "20278'444009",

                "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

                "prior_readable_until_ub": 0

            },

            "stats": {

                "version": "14674'111692",

                "reported_seq": "127502",

                "reported_epoch": "14674",

                "state": "active+undersized+degraded",

                "last_fresh": "2020-08-06T19:34:47.288930+0000",

                "last_change": "2020-08-06T19:31:58.919146+0000",

                "last_active": "2020-08-06T19:34:47.288930+0000",

                "last_peered": "2020-08-06T19:34:47.288930+0000",

                "last_clean": "2020-08-06T19:31:36.499168+0000",

                "last_became_active": "2020-08-06T19:31:58.919146+0000",

                "last_became_peered": "2020-08-06T19:31:58.919146+0000",

                "last_unstale": "2020-08-06T19:34:47.288930+0000",

                "last_undegraded": "2020-08-06T19:31:58.906847+0000",

                "last_fullsized": "2020-08-06T19:31:58.906728+0000",

                "mapping_epoch": 14678,

                "log_start": "14173'104284",

                "ondisk_log_start": "14173'104284",

                "created": 573,

                "last_epoch_clean": 14624,

                "parent": "0.0",

                "parent_split_bits": 10,

                "last_scrub": "14341'106257",

                "last_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "last_deep_scrub": "14005'91363",

                "last_deep_scrub_stamp": "2020-08-04T13:36:30.857877+0000",

                "last_clean_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "log_size": 7408,

                "ondisk_log_size": 7408,

                "stats_invalid": false,

                "dirty_stats_invalid": false,

                "omap_stats_invalid": false,

                "hitset_stats_invalid": false,

                "hitset_bytes_stats_invalid": false,

                "pin_stats_invalid": false,

                "manifest_stats_invalid": false,

                "snaptrimq_len": 0,

                "stat_sum": {

                    "num_bytes": 83721076736,

                    "num_objects": 19967,

                    "num_object_clones": 0,

                    "num_object_copies": 119802,

                    "num_objects_missing_on_primary": 0,

                    "num_objects_missing": 0,

                    "num_objects_degraded": 19967,

                    "num_objects_misplaced": 0,

                    "num_objects_unfound": 0,

                    "num_objects_dirty": 19967,

                    "num_whiteouts": 0,

                    "num_read": 938,

                    "num_read_kb": 288244,

                    "num_write": 111692,

                    "num_write_kb": 84295064,

                    "num_scrub_errors": 0,

                    "num_shallow_scrub_errors": 0,

                    "num_deep_scrub_errors": 0,

                    "num_objects_recovered": 17,

                    "num_bytes_recovered": 71303168,

                    "num_keys_recovered": 0,

                    "num_objects_omap": 0,

                    "num_objects_hit_set_archive": 0,

                    "num_bytes_hit_set_archive": 0,

                    "num_flush": 0,

                    "num_flush_kb": 0,

                    "num_evict": 0,

                    "num_evict_kb": 0,

                    "num_promote": 0,

                    "num_flush_mode_high": 0,

                    "num_flush_mode_low": 0,

                    "num_evict_mode_some": 0,

                    "num_evict_mode_full": 0,

                    "num_objects_pinned": 0,

                    "num_legacy_snapsets": 0,

                    "num_large_omap_objects": 0,

                    "num_objects_manifest": 0,

                    "num_omap_bytes": 0,

                    "num_omap_keys": 0,

                    "num_objects_repaired": 0

                },

                "up": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "acting": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "avail_no_missing": [

                    "25(0)",

                    "15(5)",

                    "95(3)",

                    "117(1)",

                    "128(2)"

                ],

                "object_location_counts": [

                    {

                        "shards": "15(5),25(0),95(3),117(1),128(2)",

                        "objects": 19967

                    }

                ],

                "blocked_by": [],

                "up_primary": 25,

                "acting_primary": 25,

                "purged_snaps": []

            },

            "empty": 0,

            "dne": 0,

            "incomplete": 0,

            "last_epoch_started": 14679,

            "hit_set_history": {

                "current_last_update": "0'0",

                "history": []

            }

        },

        {

            "peer": "95(3)",

            "pgid": "2.b9s3",

            "last_update": "20278'445510",

            "last_complete": "18934'278187",

            "log_tail": "14173'104284",

            "last_user_version": 111692,

            "last_backfill": "MAX",

            "purged_snaps": [],

            "history": {

                "epoch_created": 573,

                "epoch_pool_created": 100,

                "last_epoch_started": 14679,

                "last_interval_started": 14678,

                "last_epoch_clean": 14716,

                "last_interval_clean": 14678,

                "last_epoch_split": 573,

                "last_epoch_marked_full": 0,

                "same_up_since": 14678,

                "same_interval_since": 14678,

                "same_primary_since": 14396,

                "last_scrub": "20278'444009",

                "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_deep_scrub": "20278'444009",

                "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

                "prior_readable_until_ub": 0

            },

            "stats": {

                "version": "14674'111692",

                "reported_seq": "127502",

                "reported_epoch": "14674",

                "state": "active+undersized+degraded",

                "last_fresh": "2020-08-06T19:34:47.288930+0000",

                "last_change": "2020-08-06T19:31:58.919146+0000",

                "last_active": "2020-08-06T19:34:47.288930+0000",

                "last_peered": "2020-08-06T19:34:47.288930+0000",

                "last_clean": "2020-08-06T19:31:36.499168+0000",

                "last_became_active": "2020-08-06T19:31:58.919146+0000",

                "last_became_peered": "2020-08-06T19:31:58.919146+0000",

                "last_unstale": "2020-08-06T19:34:47.288930+0000",

                "last_undegraded": "2020-08-06T19:31:58.906847+0000",

                "last_fullsized": "2020-08-06T19:31:58.906728+0000",

                "mapping_epoch": 14678,

                "log_start": "14173'104284",

                "ondisk_log_start": "14173'104284",

                "created": 573,

                "last_epoch_clean": 14624,

                "parent": "0.0",

                "parent_split_bits": 10,

                "last_scrub": "14341'106257",

                "last_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "last_deep_scrub": "14005'91363",

                "last_deep_scrub_stamp": "2020-08-04T13:36:30.857877+0000",

                "last_clean_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "log_size": 7408,

                "ondisk_log_size": 7408,

                "stats_invalid": false,

                "dirty_stats_invalid": false,

                "omap_stats_invalid": false,

                "hitset_stats_invalid": false,

                "hitset_bytes_stats_invalid": false,

                "pin_stats_invalid": false,

                "manifest_stats_invalid": false,

                "snaptrimq_len": 0,

                "stat_sum": {

                    "num_bytes": 83721076736,

                    "num_objects": 19967,

                    "num_object_clones": 0,

                    "num_object_copies": 119802,

                    "num_objects_missing_on_primary": 0,

                    "num_objects_missing": 0,

                    "num_objects_degraded": 19967,

                    "num_objects_misplaced": 0,

                    "num_objects_unfound": 0,

                    "num_objects_dirty": 19967,

                    "num_whiteouts": 0,

                    "num_read": 938,

                    "num_read_kb": 288244,

                    "num_write": 111692,

                    "num_write_kb": 84295064,

                    "num_scrub_errors": 0,

                    "num_shallow_scrub_errors": 0,

                    "num_deep_scrub_errors": 0,

                    "num_objects_recovered": 17,

                    "num_bytes_recovered": 71303168,

                    "num_keys_recovered": 0,

                    "num_objects_omap": 0,

                    "num_objects_hit_set_archive": 0,

                    "num_bytes_hit_set_archive": 0,

                    "num_flush": 0,

                    "num_flush_kb": 0,

                    "num_evict": 0,

                    "num_evict_kb": 0,

                    "num_promote": 0,

                    "num_flush_mode_high": 0,

                    "num_flush_mode_low": 0,

                    "num_evict_mode_some": 0,

                    "num_evict_mode_full": 0,

                    "num_objects_pinned": 0,

                    "num_legacy_snapsets": 0,

                    "num_large_omap_objects": 0,

                    "num_objects_manifest": 0,

                    "num_omap_bytes": 0,

                    "num_omap_keys": 0,

                    "num_objects_repaired": 0

                },

                "up": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "acting": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "avail_no_missing": [

                    "25(0)",

                    "15(5)",

                    "95(3)",

                    "117(1)",

                    "128(2)"

                ],

                "object_location_counts": [

                    {

                        "shards": "15(5),25(0),95(3),117(1),128(2)",

                        "objects": 19967

                    }

                ],

                "blocked_by": [],

                "up_primary": 25,

                "acting_primary": 25,

                "purged_snaps": []

            },

            "empty": 0,

            "dne": 0,

            "incomplete": 0,

            "last_epoch_started": 14679,

            "hit_set_history": {

                "current_last_update": "0'0",

                "history": []

            }

        },

        {

            "peer": "117(1)",

            "pgid": "2.b9s1",

            "last_update": "20278'445510",

            "last_complete": "18934'278187",

            "log_tail": "14173'104284",

            "last_user_version": 111692,

            "last_backfill": "MAX",

            "purged_snaps": [],

            "history": {

                "epoch_created": 573,

                "epoch_pool_created": 100,

                "last_epoch_started": 14679,

                "last_interval_started": 14678,

                "last_epoch_clean": 14716,

                "last_interval_clean": 14678,

                "last_epoch_split": 573,

                "last_epoch_marked_full": 0,

                "same_up_since": 14678,

                "same_interval_since": 14678,

                "same_primary_since": 14396,

                "last_scrub": "20278'444009",

                "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_deep_scrub": "20278'444009",

                "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

                "prior_readable_until_ub": 0

            },

            "stats": {

                "version": "14674'111692",

                "reported_seq": "127502",

                "reported_epoch": "14674",

                "state": "active+undersized+degraded",

                "last_fresh": "2020-08-06T19:34:47.288930+0000",

                "last_change": "2020-08-06T19:31:58.919146+0000",

                "last_active": "2020-08-06T19:34:47.288930+0000",

                "last_peered": "2020-08-06T19:34:47.288930+0000",

                "last_clean": "2020-08-06T19:31:36.499168+0000",

                "last_became_active": "2020-08-06T19:31:58.919146+0000",

                "last_became_peered": "2020-08-06T19:31:58.919146+0000",

                "last_unstale": "2020-08-06T19:34:47.288930+0000",

                "last_undegraded": "2020-08-06T19:31:58.906847+0000",

                "last_fullsized": "2020-08-06T19:31:58.906728+0000",

                "mapping_epoch": 14678,

                "log_start": "14173'104284",

                "ondisk_log_start": "14173'104284",

                "created": 573,

                "last_epoch_clean": 14624,

                "parent": "0.0",

                "parent_split_bits": 10,

                "last_scrub": "14341'106257",

                "last_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "last_deep_scrub": "14005'91363",

                "last_deep_scrub_stamp": "2020-08-04T13:36:30.857877+0000",

                "last_clean_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "log_size": 7408,

                "ondisk_log_size": 7408,

                "stats_invalid": false,

                "dirty_stats_invalid": false,

                "omap_stats_invalid": false,

                "hitset_stats_invalid": false,

                "hitset_bytes_stats_invalid": false,

                "pin_stats_invalid": false,

                "manifest_stats_invalid": false,

                "snaptrimq_len": 0,

                "stat_sum": {

                    "num_bytes": 83721076736,

                    "num_objects": 19967,

                    "num_object_clones": 0,

                    "num_object_copies": 119802,

                    "num_objects_missing_on_primary": 0,

                    "num_objects_missing": 0,

                    "num_objects_degraded": 19967,

                    "num_objects_misplaced": 0,

                    "num_objects_unfound": 0,

                    "num_objects_dirty": 19967,

                    "num_whiteouts": 0,

                    "num_read": 938,

                    "num_read_kb": 288244,

                    "num_write": 111692,

                    "num_write_kb": 84295064,

                    "num_scrub_errors": 0,

                    "num_shallow_scrub_errors": 0,

                    "num_deep_scrub_errors": 0,

                    "num_objects_recovered": 17,

                    "num_bytes_recovered": 71303168,

                    "num_keys_recovered": 0,

                    "num_objects_omap": 0,

                    "num_objects_hit_set_archive": 0,

                    "num_bytes_hit_set_archive": 0,

                    "num_flush": 0,

                    "num_flush_kb": 0,

                    "num_evict": 0,

                    "num_evict_kb": 0,

                    "num_promote": 0,

                    "num_flush_mode_high": 0,

                    "num_flush_mode_low": 0,

                    "num_evict_mode_some": 0,

                    "num_evict_mode_full": 0,

                    "num_objects_pinned": 0,

                    "num_legacy_snapsets": 0,

                    "num_large_omap_objects": 0,

                    "num_objects_manifest": 0,

                    "num_omap_bytes": 0,

                    "num_omap_keys": 0,

                    "num_objects_repaired": 0

                },

                "up": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "acting": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "avail_no_missing": [

                    "25(0)",

                    "15(5)",

                    "95(3)",

                    "117(1)",

                    "128(2)"

                ],

                "object_location_counts": [

                    {

                        "shards": "15(5),25(0),95(3),117(1),128(2)",

                        "objects": 19967

                    }

                ],

                "blocked_by": [],

                "up_primary": 25,

                "acting_primary": 25,

                "purged_snaps": []

            },

            "empty": 0,

            "dne": 0,

            "incomplete": 0,

            "last_epoch_started": 14679,

            "hit_set_history": {

                "current_last_update": "0'0",

                "history": []

            }

        },

        {

            "peer": "128(2)",

            "pgid": "2.b9s2",

            "last_update": "20278'445510",

            "last_complete": "18934'278187",

            "log_tail": "14173'104284",

            "last_user_version": 111692,

            "last_backfill": "MAX",

            "purged_snaps": [],

            "history": {

                "epoch_created": 573,

                "epoch_pool_created": 100,

                "last_epoch_started": 14679,

                "last_interval_started": 14678,

                "last_epoch_clean": 14716,

                "last_interval_clean": 14678,

                "last_epoch_split": 573,

                "last_epoch_marked_full": 0,

                "same_up_since": 14678,

                "same_interval_since": 14678,

                "same_primary_since": 14396,

                "last_scrub": "20278'444009",

                "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_deep_scrub": "20278'444009",

                "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

                "prior_readable_until_ub": 0

            },

            "stats": {

                "version": "14674'111692",

                "reported_seq": "127502",

                "reported_epoch": "14674",

                "state": "active+undersized+degraded",

                "last_fresh": "2020-08-06T19:34:47.288930+0000",

                "last_change": "2020-08-06T19:31:58.919146+0000",

                "last_active": "2020-08-06T19:34:47.288930+0000",

                "last_peered": "2020-08-06T19:34:47.288930+0000",

                "last_clean": "2020-08-06T19:31:36.499168+0000",

                "last_became_active": "2020-08-06T19:31:58.919146+0000",

                "last_became_peered": "2020-08-06T19:31:58.919146+0000",

                "last_unstale": "2020-08-06T19:34:47.288930+0000",

                "last_undegraded": "2020-08-06T19:31:58.906847+0000",

                "last_fullsized": "2020-08-06T19:31:58.906728+0000",

                "mapping_epoch": 14678,

                "log_start": "14173'104284",

                "ondisk_log_start": "14173'104284",

                "created": 573,

                "last_epoch_clean": 14624,

                "parent": "0.0",

                "parent_split_bits": 10,

                "last_scrub": "14341'106257",

                "last_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "last_deep_scrub": "14005'91363",

                "last_deep_scrub_stamp": "2020-08-04T13:36:30.857877+0000",

                "last_clean_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "log_size": 7408,

                "ondisk_log_size": 7408,

                "stats_invalid": false,

                "dirty_stats_invalid": false,

                "omap_stats_invalid": false,

                "hitset_stats_invalid": false,

                "hitset_bytes_stats_invalid": false,

                "pin_stats_invalid": false,

                "manifest_stats_invalid": false,

                "snaptrimq_len": 0,

                "stat_sum": {

                    "num_bytes": 83721076736,

                    "num_objects": 19967,

                    "num_object_clones": 0,

                    "num_object_copies": 119802,

                    "num_objects_missing_on_primary": 0,

                    "num_objects_missing": 0,

                    "num_objects_degraded": 19967,

                    "num_objects_misplaced": 0,

                    "num_objects_unfound": 0,

                    "num_objects_dirty": 19967,

                    "num_whiteouts": 0,

                    "num_read": 938,

                    "num_read_kb": 288244,

                    "num_write": 111692,

                    "num_write_kb": 84295064,

                    "num_scrub_errors": 0,

                    "num_shallow_scrub_errors": 0,

                    "num_deep_scrub_errors": 0,

                    "num_objects_recovered": 17,

                    "num_bytes_recovered": 71303168,

                    "num_keys_recovered": 0,

                    "num_objects_omap": 0,

                    "num_objects_hit_set_archive": 0,

                    "num_bytes_hit_set_archive": 0,

                    "num_flush": 0,

                    "num_flush_kb": 0,

                    "num_evict": 0,

                    "num_evict_kb": 0,

                    "num_promote": 0,

                    "num_flush_mode_high": 0,

                    "num_flush_mode_low": 0,

                    "num_evict_mode_some": 0,

                    "num_evict_mode_full": 0,

                    "num_objects_pinned": 0,

                    "num_legacy_snapsets": 0,

                    "num_large_omap_objects": 0,

                    "num_objects_manifest": 0,

                    "num_omap_bytes": 0,

                    "num_omap_keys": 0,

                    "num_objects_repaired": 0

                },

                "up": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "acting": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "avail_no_missing": [

                    "25(0)",

                    "15(5)",

                    "95(3)",

                    "117(1)",

                    "128(2)"

                ],

                "object_location_counts": [

                    {

                        "shards": "15(5),25(0),95(3),117(1),128(2)",

                        "objects": 19967

                    }

                ],

                "blocked_by": [],

                "up_primary": 25,

                "acting_primary": 25,

                "purged_snaps": []

            },

            "empty": 0,

            "dne": 0,

            "incomplete": 0,

            "last_epoch_started": 14679,

            "hit_set_history": {

                "current_last_update": "0'0",

                "history": []

            }

        },

        {

            "peer": "151(4)",

            "pgid": "2.b9s4",

            "last_update": "20278'445510",

            "last_complete": "14671'111684",

            "log_tail": "14173'104284",

            "last_user_version": 111684,

            "last_backfill": "MAX",

            "purged_snaps": [],

            "history": {

                "epoch_created": 573,

                "epoch_pool_created": 100,

                "last_epoch_started": 14679,

                "last_interval_started": 14678,

                "last_epoch_clean": 14716,

                "last_interval_clean": 14678,

                "last_epoch_split": 573,

                "last_epoch_marked_full": 0,

                "same_up_since": 14678,

                "same_interval_since": 14678,

                "same_primary_since": 14396,

                "last_scrub": "20278'444009",

                "last_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_deep_scrub": "20278'444009",

                "last_deep_scrub_stamp": "2020-09-08T16:57:22.430246+0000",

                "last_clean_scrub_stamp": "2020-09-07T06:34:26.320796+0000",

                "prior_readable_until_ub": 0

            },

            "stats": {

                "version": "14671'111684",

                "reported_seq": "127482",

                "reported_epoch": "14671",

                "state": "active+clean",

                "last_fresh": "2020-08-06T19:31:36.499168+0000",

                "last_change": "2020-08-06T19:28:38.923454+0000",

                "last_active": "2020-08-06T19:31:36.499168+0000",

                "last_peered": "2020-08-06T19:31:36.499168+0000",

                "last_clean": "2020-08-06T19:31:36.499168+0000",

                "last_became_active": "2020-08-06T19:28:15.372420+0000",

                "last_became_peered": "2020-08-06T19:28:15.372420+0000",

                "last_unstale": "2020-08-06T19:31:36.499168+0000",

                "last_undegraded": "2020-08-06T19:31:36.499168+0000",

                "last_fullsized": "2020-08-06T19:31:36.499168+0000",

                "mapping_epoch": 14678,

                "log_start": "14173'104284",

                "ondisk_log_start": "14173'104284",

                "created": 573,

                "last_epoch_clean": 14624,

                "parent": "0.0",

                "parent_split_bits": 10,

                "last_scrub": "14341'106257",

                "last_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "last_deep_scrub": "14005'91363",

                "last_deep_scrub_stamp": "2020-08-04T13:36:30.857877+0000",

                "last_clean_scrub_stamp": "2020-08-06T00:08:25.447555+0000",

                "log_size": 7400,

                "ondisk_log_size": 7400,

                "stats_invalid": false,

                "dirty_stats_invalid": false,

                "omap_stats_invalid": false,

                "hitset_stats_invalid": false,

                "hitset_bytes_stats_invalid": false,

                "pin_stats_invalid": false,

                "manifest_stats_invalid": false,

                "snaptrimq_len": 0,

                "stat_sum": {

                    "num_bytes": 83704299520,

                    "num_objects": 19963,

                    "num_object_clones": 0,

                    "num_object_copies": 119778,

                    "num_objects_missing_on_primary": 0,

                    "num_objects_missing": 0,

                    "num_objects_degraded": 0,

                    "num_objects_misplaced": 0,

                    "num_objects_unfound": 0,

                    "num_objects_dirty": 19963,

                    "num_whiteouts": 0,

                    "num_read": 938,

                    "num_read_kb": 288244,

                    "num_write": 111684,

                    "num_write_kb": 84278680,

                    "num_scrub_errors": 0,

                    "num_shallow_scrub_errors": 0,

                    "num_deep_scrub_errors": 0,

                    "num_objects_recovered": 17,

                    "num_bytes_recovered": 71303168,

                    "num_keys_recovered": 0,

                    "num_objects_omap": 0,

                    "num_objects_hit_set_archive": 0,

                    "num_bytes_hit_set_archive": 0,

                    "num_flush": 0,

                    "num_flush_kb": 0,

                    "num_evict": 0,

                    "num_evict_kb": 0,

                    "num_promote": 0,

                    "num_flush_mode_high": 0,

                    "num_flush_mode_low": 0,

                    "num_evict_mode_some": 0,

                    "num_evict_mode_full": 0,

                    "num_objects_pinned": 0,

                    "num_legacy_snapsets": 0,

                    "num_large_omap_objects": 0,

                    "num_objects_manifest": 0,

                    "num_omap_bytes": 0,

                    "num_omap_keys": 0,

                    "num_objects_repaired": 0

                },

                "up": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "acting": [

                    25,

                    117,

                    128,

                    95,

                    151,

                    15

                ],

                "avail_no_missing": [],

                "object_location_counts": [],

                "blocked_by": [],

                "up_primary": 25,

                "acting_primary": 25,

                "purged_snaps": []

            },

            "empty": 0,

            "dne": 0,

            "incomplete": 0,

            "last_epoch_started": 14679,

            "hit_set_history": {

                "current_last_update": "0'0",

                "history": []

            }

        }

    ],

    "recovery_state": [

        {

            "name": "Started/Primary/Active",

            "enter_time": "2020-08-06T19:35:02.502760+0000",

            "might_have_unfound": [

                {

                    "osd": "15(5)",

                    "status": "already probed"

                },

                {

                    "osd": "95(3)",

                    "status": "already probed"

                },

                {

                    "osd": "117(1)",

                    "status": "already probed"

                },

                {

                    "osd": "128(2)",

                    "status": "already probed"

                },

                {

                    "osd": "151(4)",

                    "status": "already probed"

                }

            ],

            "recovery_progress": {

                "backfill_targets": [],

                "waiting_on_backfill": [],

                "last_backfill_started": "MIN",

                "backfill_info": {

                    "begin": "MIN",

                    "end": "MIN",

                    "objects": []

                },

                "peer_backfill_info": [],

                "backfills_in_flight": [],

                "recovering": [],

                "pg_backend": {

                    "recovery_ops": [],

                    "read_ops": []

                }

            }

        },

        {

            "name": "Started",

            "enter_time": "2020-08-06T19:35:01.477315+0000"

        },

        {

            "scrubber.epoch_start": "14678",

            "scrubber.active": false,

            "scrubber.state": "INACTIVE",

            "scrubber.start": "MIN",

            "scrubber.end": "MIN",

            "scrubber.max_end": "MIN",

            "scrubber.subset_last_update": "0'0",

            "scrubber.deep": false,

            "scrubber.waiting_on_whom": []

        }

    ],

    "agent_state": {}

}

Every time we look at them, we see the same checksum (0x6706be76):

debug 2020-08-13T18:39:01.731+0000 7fbc037a7700 -1
bluestore(/var/lib/ceph/osd/ceph-25) _verify_csum bad crc32c/0x1000
checksum at blob offset 0x0, got 0x6706be76, expected 0x61f2021c, device
location [0x12b403c0000~1000], logical extent 0x0~1000, object
2#2:0f1a338f:::rbd_data.3.20d195d612942.0000000001db869b:head#


This looks a lot like: https://tracker.ceph.com/issues/22464

That said, we've got the following versions in play (cluster was created
with 15.2.3):

ceph version 15.2.4 (7447c15c6ff58d7fce91843b705a268a1917325c) octopus
(stable)


This is a containerized cephadm installation, in case it's relevant.
Distribution is Ubuntu 18.04.04, kernel is the HWE kernel:

Linux ceph02 5.4.0-42-generic #46~18.04.1-Ubuntu SMP Fri Jul 10 07:21:24
UTC 2020 x86_64 x86_64 x86_64 GNU/Linux

A repair operation 'fixes' it. These are occurring across many PGs, on the
various different servers, and we see no indication of any hardware related
issues.

Any ideas what to do next?
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux