1 pgs inconsistent 2 scrub errors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,

 

We have some problems with 1 pg from this morning, this is what we found so far…

 

# ceph --version

ceph version 10.2.0 (3a9fba20ec743699b69bd0181dd6c54dc01c64b9)

 

# ceph -s

    cluster 2bf80721-fceb-4b63-89ee-1a5faa278493

     health HEALTH_ERR

            1 pgs inconsistent

            2 scrub errors

     monmap e1: 1 mons at {cephadm01=192.168.12.150:6789/0}

            election epoch 7, quorum 0 cephadm01

     osdmap e580: 9 osds: 9 up, 9 in

            flags sortbitwise

      pgmap v11430755: 664 pgs, 13 pools, 1010 GB data, 13894 kobjects

            2142 GB used, 2355 GB / 4497 GB avail

                 660 active+clean

                   3 active+clean+scrubbing

                   1 active+clean+inconsistent

 

# ceph health detail

HEALTH_ERR 1 pgs inconsistent; 2 scrub errors

pg 10.55 is active+clean+inconsistent, acting [3,4]

2 scrub errors

 

# ceph pg 10.55 query                                                                                                                                                     

{

    "state": "active+clean+inconsistent",

    "snap_trimq": "[]",

    "epoch": 580,

    "up": [

        3,

        4

    ],

    "acting": [

        3,

        4

    ],

    "actingbackfill": [

        "3",

        "4"

    ],

    "info": {

        "pgid": "10.55",

        "last_update": "580'40334",

        "last_complete": "580'40334",

        "log_tail": "448'37299",

        "last_user_version": 40334,

        "last_backfill": "MAX",

        "last_backfill_bitwise": 1,

        "purged_snaps": "[]",

        "history": {

            "epoch_created": 329,

            "last_epoch_started": 577,

            "last_epoch_clean": 577,

            "last_epoch_split": 0,

            "last_epoch_marked_full": 0,

            "same_up_since": 576,

            "same_interval_since": 576,

            "same_primary_since": 572,

            "last_scrub": "568'40333",

            "last_scrub_stamp": "2017-01-26 10:06:56.062870",

            "last_deep_scrub": "562'40329",

            "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518",

            "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218"

        },

        "stats": {

            "version": "580'40334",

            "reported_seq": "49407",

            "reported_epoch": "580",

            "state": "active+clean+inconsistent",

            "last_fresh": "2017-01-26 11:21:55.393989",

            "last_change": "2017-01-26 10:06:56.062930",

            "last_active": "2017-01-26 11:21:55.393989",

            "last_peered": "2017-01-26 11:21:55.393989",

            "last_clean": "2017-01-26 11:21:55.393989",

            "last_became_active": "2017-01-26 09:28:09.196447",

            "last_became_peered": "2017-01-26 09:28:09.196447",

            "last_unstale": "2017-01-26 11:21:55.393989",

            "last_undegraded": "2017-01-26 11:21:55.393989",

            "last_fullsized": "2017-01-26 11:21:55.393989",

            "mapping_epoch": 575,

            "log_start": "448'37299",

            "ondisk_log_start": "448'37299",

            "created": 329,

            "last_epoch_clean": 577,

            "parent": "0.0",

            "parent_split_bits": 8,

            "last_scrub": "568'40333",

            "last_scrub_stamp": "2017-01-26 10:06:56.062870",

            "last_deep_scrub": "562'40329",

            "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518",

            "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218",

            "log_size": 3035,

            "ondisk_log_size": 3035,

            "stats_invalid": false,

            "dirty_stats_invalid": false,

            "omap_stats_invalid": false,

            "hitset_stats_invalid": false,

            "hitset_bytes_stats_invalid": false,

            "pin_stats_invalid": false,

            "stat_sum": {

                "num_bytes": 2153869599,

                "num_objects": 28148,

                "num_object_clones": 0,

                "num_object_copies": 56296,

                "num_objects_missing_on_primary": 0,

                "num_objects_missing": 0,

                "num_objects_degraded": 0,

                "num_objects_misplaced": 0,

                "num_objects_unfound": 0,

                "num_objects_dirty": 28148,

                "num_whiteouts": 0,

                "num_read": 21,

                "num_read_kb": 696,

                "num_write": 50,

                "num_write_kb": 217,

                "num_scrub_errors": 2,

                "num_shallow_scrub_errors": 1,

                "num_deep_scrub_errors": 1,

                "num_objects_recovered": 0,

                "num_bytes_recovered": 0,

                "num_keys_recovered": 0,

                "num_objects_omap": 0,

                "num_objects_hit_set_archive": 0,

                "num_bytes_hit_set_archive": 0,

                "num_flush": 0,

                "num_flush_kb": 0,

                "num_evict": 0,

                "num_evict_kb": 0,

                "num_promote": 0,

                "num_flush_mode_high": 0,

                "num_flush_mode_low": 0,

                "num_evict_mode_some": 0,

                "num_evict_mode_full": 0,

                "num_objects_pinned": 0

            },

            "up": [

                3,

                4

            ],

            "acting": [

                3,

                4

            ],

            "blocked_by": [],

            "up_primary": 3,

            "acting_primary": 3

        },

        "empty": 0,

        "dne": 0,

        "incomplete": 0,

        "last_epoch_started": 577,

        "hit_set_history": {

            "current_last_update": "0'0",

            "history": []

        }

    },

    "peer_info": [

        {

            "peer": "4",

            "pgid": "10.55",

            "last_update": "580'40334",

            "last_complete": "580'40334",

            "log_tail": "448'37299",

            "last_user_version": 40333,

            "last_backfill": "MAX",

            "last_backfill_bitwise": 1,

            "purged_snaps": "[]",

            "history": {

                "epoch_created": 329,

                "last_epoch_started": 577,

                "last_epoch_clean": 577,

                "last_epoch_split": 0,

                "last_epoch_marked_full": 0,

                "same_up_since": 576,

                "same_interval_since": 576,

                "same_primary_since": 572,

                "last_scrub": "568'40333",

                "last_scrub_stamp": "2017-01-26 10:06:56.062870",

                "last_deep_scrub": "562'40329",

                "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518",

                "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218"

            },

            "stats": {

                "version": "568'40333",

                "reported_seq": "49386",

                "reported_epoch": "571",

                "state": "inconsistent+peering",

                "last_fresh": "2017-01-26 09:25:26.210512",

                "last_change": "2017-01-26 09:25:26.210512",

                "last_active": "2017-01-26 08:40:35.617481",

                "last_peered": "2017-01-26 08:40:35.617481",

                "last_clean": "2017-01-26 08:40:35.617481",

                "last_became_active": "2017-01-26 08:29:09.145329",

                "last_became_peered": "2017-01-26 08:29:09.145329",

                "last_unstale": "2017-01-26 09:25:26.210512",

                "last_undegraded": "2017-01-26 09:25:26.210512",

                "last_fullsized": "2017-01-26 09:25:26.210512",

                "mapping_epoch": 575,

                "log_start": "448'37299",

                "ondisk_log_start": "448'37299",

                "created": 329,

                "last_epoch_clean": 568,

                "parent": "0.0",

                "parent_split_bits": 8,

                "last_scrub": "562'40329",

                "last_scrub_stamp": "2017-01-26 06:19:55.708518",

                "last_deep_scrub": "562'40329",

                "last_deep_scrub_stamp": "2017-01-26 06:19:55.708518",

                "last_clean_scrub_stamp": "2016-07-05 14:58:45.534218",

                "log_size": 3034,

                "ondisk_log_size": 3034,

                "stats_invalid": false,

                "dirty_stats_invalid": false,

                "omap_stats_invalid": false,

                "hitset_stats_invalid": false,

                "hitset_bytes_stats_invalid": false,

                "pin_stats_invalid": false,

                "stat_sum": {

                    "num_bytes": 2153849873,

                    "num_objects": 28147,

                    "num_object_clones": 0,

                   "num_object_copies": 56294,

                    "num_objects_missing_on_primary": 0,

                    "num_objects_missing": 0,

                    "num_objects_degraded": 0,

                    "num_objects_misplaced": 0,

                    "num_objects_unfound": 0,

                    "num_objects_dirty": 28147,

                    "num_whiteouts": 0,

                    "num_read": 6,

                    "num_read_kb": 110,

                    "num_write": 40,

                    "num_write_kb": 197,

                    "num_scrub_errors": 1,

                    "num_shallow_scrub_errors": 0,

                    "num_deep_scrub_errors": 1,

                    "num_objects_recovered": 0,

                    "num_bytes_recovered": 0,

                    "num_keys_recovered": 0,

                    "num_objects_omap": 0,

                    "num_objects_hit_set_archive": 0,

                    "num_bytes_hit_set_archive": 0,

                    "num_flush": 0,

                    "num_flush_kb": 0,

                    "num_evict": 0,

                    "num_evict_kb": 0,

                    "num_promote": 0,

                    "num_flush_mode_high": 0,

                    "num_flush_mode_low": 0,

                    "num_evict_mode_some": 0,

                    "num_evict_mode_full": 0,

                    "num_objects_pinned": 0

                },

                "up": [

                    3,

                    4

                ],

                "acting": [

                    3,

                    4

                ],

                "blocked_by": [],

                "up_primary": 3,

                "acting_primary": 3

            },

            "empty": 0,

            "dne": 0,

            "incomplete": 0,

            "last_epoch_started": 577,

            "hit_set_history": {

                "current_last_update": "0'0",

                "history": []

            }

        }

    ],

    "recovery_state": [

        {

            "name": "Started\/Primary\/Active",

            "enter_time": "2017-01-26 09:28:09.159017",

            "might_have_unfound": [],

            "recovery_progress": {

                "backfill_targets": [],

                "waiting_on_backfill": [],

                "last_backfill_started": "MIN",

                "backfill_info": {

                    "begin": "MIN",

                    "end": "MIN",

                    "objects": []

                },

                "peer_backfill_info": [],

                "backfills_in_flight": [],

                "recovering": [],

                "pg_backend": {

                    "pull_from_peer": [],

                    "pushing": []

                }

            },

            "scrub": {

                "scrubber.epoch_start": "576",

                "scrubber.active": 0,

                "scrubber.state": "INACTIVE",

                "scrubber.start": "MIN",

                "scrubber.end": "MIN",

                "scrubber.subset_last_update": "0'0",

                "scrubber.deep": false,

                "scrubber.seed": 0,

                "scrubber.waiting_on": 0,

                "scrubber.waiting_on_whom": []

            }

        },

        {

            "name": "Started",

            "enter_time": "2017-01-26 09:28:08.166221"

        }

    ],

    "agent_state": {}

}

 

# grep -Hn 'ERR' /var/log/ceph/ceph-osd.3.log

/var/log/ceph/ceph-osd.3.log:47:2017-01-26 06:08:48.147129 7f3fda627700 -1 log_channel(cluster) log [ERR] : 10.55 shard 3: soid 10:aa0c6d9c:::ef4069bf-70fb-4414-a9d9-6bf5b32608fb.34127.33_nalazi%2f201607%2fLab_7bd28004-cc9d-4039-9567-7f5c597f6d88.pdf:head data_digest 0xc44df2ba != known data_digest 0xff59029 from auth shard 4

/var/log/ceph/ceph-osd.3.log:48:2017-01-26 06:19:55.708507 7f3fda627700 -1 log_channel(cluster) log [ERR] : 10.55 deep-scrub 0 missing, 1 inconsistent objects

/var/log/ceph/ceph-osd.3.log:49:2017-01-26 06:19:55.708513 7f3fda627700 -1 log_channel(cluster) log [ERR] : 10.55 deep-scrub 1 errors

/var/log/ceph/ceph-osd.3.log:7464:2017-01-26 10:00:48.267401 7fa6970c2700 -1 log_channel(cluster) log [ERR] : 10.55 shard 3 missing 10:aa0c6d9c:::ef4069bf-70fb-4414-a9d9-6bf5b32608fb.34127.33_nalazi%2f201607%2fLab_7bd28004-cc9d-4039-9567-7f5c597f6d88.pdf:head

/var/log/ceph/ceph-osd.3.log:7467:2017-01-26 10:06:56.062852 7fa6970c2700 -1 log_channel(cluster) log [ERR] : 10.55 scrub 1 missing, 0 inconsistent objects

/var/log/ceph/ceph-osd.3.log:7468:2017-01-26 10:06:56.062858 7fa6970c2700 -1 log_channel(cluster) log [ERR] : 10.55 scrub 1 errors ( 1 remaining deep scrub error(s) )

 

We have located inconsistent pg on the osd.3, and have tried the following:

Stop the osd 3.

Flushed journal

Moved the object to /tmp

Started osd 3

Tried ceph pg repair 10.55, but no luck…

 

Is there some ceph tool we could use to copy the healthy object from osd 4 to osd 3?

 

Best regards!

 

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux