Re: scrub errors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



hi, thanks.  Its still using Hammer.  Here's the output from the pg query, the last command you gave doesn't work at all but be too old.


# ceph pg 10.2a query
{
    "state": "active+clean+inconsistent",
    "snap_trimq": "[]",
    "epoch": 23265,
    "up": [
        41,
        38,
        8
    ],
    "acting": [
        41,
        38,
        8
    ],
    "actingbackfill": [
        "8",
        "38",
        "41"
    ],
    "info": {
        "pgid": "10.2a",
        "last_update": "23265'20886859",
        "last_complete": "23265'20886859",
        "log_tail": "23265'20883809",
        "last_user_version": 20886859,
        "last_backfill": "MAX",
        "purged_snaps": "[]",
        "history": {
            "epoch_created": 8200,
            "last_epoch_started": 21481,
            "last_epoch_clean": 21487,
            "last_epoch_split": 0,
            "same_up_since": 21472,
            "same_interval_since": 21474,
            "same_primary_since": 8244,
            "last_scrub": "23265'20864209",
            "last_scrub_stamp": "2019-03-22 22:39:13.930673",
            "last_deep_scrub": "23265'20864209",
            "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
            "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
        },
        "stats": {
            "version": "23265'20886859",
            "reported_seq": "10109937",
            "reported_epoch": "23265",
            "state": "active+clean+inconsistent",
            "last_fresh": "2019-03-25 15:52:53.720768",
            "last_change": "2019-03-22 22:39:13.931038",
            "last_active": "2019-03-25 15:52:53.720768",
            "last_peered": "2019-03-25 15:52:53.720768",
            "last_clean": "2019-03-25 15:52:53.720768",
            "last_became_active": "0.000000",
            "last_became_peered": "0.000000",
            "last_unstale": "2019-03-25 15:52:53.720768",
            "last_undegraded": "2019-03-25 15:52:53.720768",
            "last_fullsized": "2019-03-25 15:52:53.720768",
            "mapping_epoch": 21472,
            "log_start": "23265'20883809",
            "ondisk_log_start": "23265'20883809",
            "created": 8200,
            "last_epoch_clean": 21487,
            "parent": "0.0",
            "parent_split_bits": 0,
            "last_scrub": "23265'20864209",
            "last_scrub_stamp": "2019-03-22 22:39:13.930673",
            "last_deep_scrub": "23265'20864209",
            "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
            "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438",
            "log_size": 3050,
            "ondisk_log_size": 3050,
            "stats_invalid": "0",
            "stat_sum": {
                "num_bytes": 8220278746,
                "num_objects": 345034,
                "num_object_clones": 0,
                "num_object_copies": 1035102,
                "num_objects_missing_on_primary": 0,
                "num_objects_degraded": 0,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 0,
                "num_objects_dirty": 345034,
                "num_whiteouts": 0,
                "num_read": 7904350,
                "num_read_kb": 58116568,
                "num_write": 8753504,
                "num_write_kb": 85104263,
                "num_scrub_errors": 47,
                "num_shallow_scrub_errors": 47,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 167138,
                "num_bytes_recovered": 5193543924,
                "num_keys_recovered": 0,
                "num_objects_omap": 0,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0
            },
            "up": [
                41,
                38,
                8
            ],
            "acting": [
                41,
                38,
                8
            ],
            "blocked_by": [],
            "up_primary": 41,
            "acting_primary": 41
        },
        "empty": 0,
        "dne": 0,
        "incomplete": 0,
        "last_epoch_started": 21481,
        "hit_set_history": {
            "current_last_update": "0'0",
            "current_last_stamp": "0.000000",
            "current_info": {
                "begin": "0.000000",
                "end": "0.000000",
                "version": "0'0",
                "using_gmt": "0"
            },
            "history": []
        }
    },
    "peer_info": [
        {
            "peer": "8",
            "pgid": "10.2a",
            "last_update": "23265'20886859",
            "last_complete": "23265'20886859",
            "log_tail": "21395'11840466",
            "last_user_version": 11843648,
            "last_backfill": "MAX",
            "purged_snaps": "[]",
            "history": {
                "epoch_created": 8200,
                "last_epoch_started": 21481,
                "last_epoch_clean": 21487,
                "last_epoch_split": 0,
                "same_up_since": 21472,
                "same_interval_since": 21474,
                "same_primary_since": 8244,
                "last_scrub": "23265'20864209",
                "last_scrub_stamp": "2019-03-22 22:39:13.930673",
                "last_deep_scrub": "23265'20864209",
                "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
                "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
            },
            "stats": {
                "version": "21471'11843647",
                "reported_seq": "7670875",
                "reported_epoch": "21471",
                "state": "active+undersized+degraded+remapped+wait_backfill",
                "last_fresh": "2018-09-22 07:07:23.061013",
                "last_change": "2018-09-22 06:39:32.487204",
                "last_active": "2018-09-22 07:07:23.061013",
                "last_peered": "2018-09-22 07:07:23.061013",
                "last_clean": "2018-09-22 06:33:47.246063",
                "last_became_active": "0.000000",
                "last_became_peered": "0.000000",
                "last_unstale": "2018-09-22 07:07:23.061013",
                "last_undegraded": "2018-09-22 06:39:13.626445",
                "last_fullsized": "2018-09-22 06:39:13.626445",
                "mapping_epoch": 21472,
                "log_start": "21395'11840466",
                "ondisk_log_start": "21395'11840466",
                "created": 8200,
                "last_epoch_clean": 21397,
                "parent": "0.0",
                "parent_split_bits": 0,
                "last_scrub": "21395'11835365",
                "last_scrub_stamp": "2018-09-21 12:11:47.230141",
                "last_deep_scrub": "21395'11835365",
                "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
                "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
                "log_size": 3181,
                "ondisk_log_size": 3181,
                "stats_invalid": "0",
                "stat_sum": {
                    "num_bytes": 6406027390,
                    "num_objects": 241710,
                    "num_object_clones": 0,
                    "num_object_copies": 966844,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_degraded": 241715,
                    "num_objects_misplaced": 725133,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 241710,
                    "num_whiteouts": 0,
                    "num_read": 5638025,
                    "num_read_kb": 48736266,
                    "num_write": 6789818,
                    "num_write_kb": 67680335,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 167079,
                    "num_bytes_recovered": 5191625476,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0
                },
                "up": [
                    41,
                    38,
                    8
                ],
                "acting": [
                    41,
                    38,
                    8
                ],
                "blocked_by": [],
                "up_primary": 41,
                "acting_primary": 41
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 0,
            "last_epoch_started": 21481,
            "hit_set_history": {
                "current_last_update": "0'0",
                "current_last_stamp": "0.000000",
                "current_info": {
                    "begin": "0.000000",
                    "end": "0.000000",
                    "version": "0'0",
                    "using_gmt": "0"
                },
                "history": []
            }
        },
        {
            "peer": "38",
            "pgid": "10.2a",
            "last_update": "23265'20886859",
            "last_complete": "21395'11843517",
            "log_tail": "21395'11840466",
            "last_user_version": 11843517,
            "last_backfill": "MAX",
            "purged_snaps": "[]",
            "history": {
                "epoch_created": 8200,
                "last_epoch_started": 21481,
                "last_epoch_clean": 21487,
                "last_epoch_split": 0,
                "same_up_since": 21472,
                "same_interval_since": 21474,
                "same_primary_since": 8244,
                "last_scrub": "23265'20864209",
                "last_scrub_stamp": "2019-03-22 22:39:13.930673",
                "last_deep_scrub": "23265'20864209",
                "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
                "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
            },
            "stats": {
                "version": "21395'11843516",
                "reported_seq": "7670719",
                "reported_epoch": "21395",
                "state": "active+clean",
                "last_fresh": "2018-09-22 06:33:14.791334",
                "last_change": "2018-09-21 12:11:47.230557",
                "last_active": "2018-09-22 06:33:14.791334",
                "last_peered": "2018-09-22 06:33:14.791334",
                "last_clean": "2018-09-22 06:33:14.791334",
                "last_became_active": "0.000000",
                "last_became_peered": "0.000000",
                "last_unstale": "2018-09-22 06:33:14.791334",
                "last_undegraded": "2018-09-22 06:33:14.791334",
                "last_fullsized": "2018-09-22 06:33:14.791334",
                "mapping_epoch": 21472,
                "log_start": "21395'11840466",
                "ondisk_log_start": "21395'11840466",
                "created": 8200,
                "last_epoch_clean": 20840,
                "parent": "0.0",
                "parent_split_bits": 0,
                "last_scrub": "21395'11835365",
                "last_scrub_stamp": "2018-09-21 12:11:47.230141",
                "last_deep_scrub": "21395'11835365",
                "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
                "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
                "log_size": 3050,
                "ondisk_log_size": 3050,
                "stats_invalid": "0",
                "stat_sum": {
                    "num_bytes": 6405126628,
                    "num_objects": 241711,
                    "num_object_clones": 0,
                    "num_object_copies": 725130,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_degraded": 0,
                    "num_objects_misplaced": 0,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 241711,
                    "num_whiteouts": 0,
                    "num_read": 5637862,
                    "num_read_kb": 48735376,
                    "num_write": 6789687,
                    "num_write_kb": 67678402,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 167079,
                    "num_bytes_recovered": 5191625476,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0
                },
                "up": [
                    41,
                    38,
                    8
                ],
                "acting": [
                    41,
                    38,
                    8
                ],
                "blocked_by": [],
                "up_primary": 41,
                "acting_primary": 41
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 0,
            "last_epoch_started": 21481,
            "hit_set_history": {
                "current_last_update": "0'0",
                "current_last_stamp": "0.000000",
                "current_info": {
                    "begin": "0.000000",
                    "end": "0.000000",
                    "version": "0'0",
                    "using_gmt": "0"
                },
                "history": []
            }
        }
    ],
    "recovery_state": [
        {
            "name": "Started\/Primary\/Active",
            "enter_time": "2018-09-22 07:07:48.637248",
            "might_have_unfound": [
                {
                    "osd": "7",
                    "status": "not queried"
                },
                {
                    "osd": "8",
                    "status": "already probed"
                },
                {
                    "osd": "17",
                    "status": "not queried"
                },
                {
                    "osd": "38",
                    "status": "already probed"
                }
            ],
            "recovery_progress": {
                "backfill_targets": [],
                "waiting_on_backfill": [],
                "last_backfill_started": "-1\/0\/\/0",
                "backfill_info": {
                    "begin": "-1\/0\/\/0",
                    "end": "-1\/0\/\/0",
                    "objects": []
                },
                "peer_backfill_info": [],
                "backfills_in_flight": [],
                "recovering": [],
                "pg_backend": {
                    "pull_from_peer": [],
                    "pushing": []
                }
            },
            "scrub": {
                "scrubber.epoch_start": "21474",
                "scrubber.active": 0,
                "scrubber.waiting_on": 0,
                "scrubber.waiting_on_whom": []
            }
        },
        {
            "name": "Started",
            "enter_time": "2018-09-22 07:07:42.138358"
        }
    ],
    "agent_state": {}
}


On Mon, Mar 25, 2019 at 3:46 PM Brad Hubbard <bhubbard@xxxxxxxxxx> wrote:
It would help to know what version you are running but, to begin with,
could you post the output of the following?

$ sudo ceph pg 10.2a query
$ sudo rados list-inconsistent-obj 10.2a --format=json-pretty

Also, have a read of
http://docs.ceph.com/docs/mimic/rados/troubleshooting/troubleshooting-pg/
(adjust the URl for your release).

On Tue, Mar 26, 2019 at 8:19 AM solarflow99 <solarflow99@xxxxxxxxx> wrote:
>
> I noticed my cluster has scrub errors but the deep-scrub command doesn't show any errors.  Is there any way to know what it takes to fix it?
>
>
>
> # ceph health detail
> HEALTH_ERR 1 pgs inconsistent; 47 scrub errors
> pg 10.2a is active+clean+inconsistent, acting [41,38,8]
> 47 scrub errors
>
> # zgrep 10.2a /var/log/ceph/ceph.log*
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 16:20:18.148299 osd.41 192.168.4.19:6809/30077 54885 : cluster [INF] 10.2a deep-scrub starts
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024040 osd.41 192.168.4.19:6809/30077 54886 : cluster [ERR] 10.2a shard 38 missing 10/24083d2a/ec50777d-cc99-46a8-8610-4492213f412f/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024049 osd.41 192.168.4.19:6809/30077 54887 : cluster [ERR] 10.2a shard 38 missing 10/ff183d2a/fce859b9-61a9-46cb-82f1-4b4af31c10db/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024074 osd.41 192.168.4.19:6809/30077 54888 : cluster [ERR] 10.2a shard 38 missing 10/34283d2a/4b7c96cb-c494-4637-8669-e42049bd0e1c/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024076 osd.41 192.168.4.19:6809/30077 54889 : cluster [ERR] 10.2a shard 38 missing 10/df283d2a/bbe61149-99f8-4b83-a42b-b208d18094a8/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024077 osd.41 192.168.4.19:6809/30077 54890 : cluster [ERR] 10.2a shard 38 missing 10/35383d2a/60e8ed9b-bd04-5a43-8917-6f29eba28a66:0014/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024078 osd.41 192.168.4.19:6809/30077 54891 : cluster [ERR] 10.2a shard 38 missing 10/d5383d2a/2bdeb186-561b-4151-b87e-fe7c2e217d41/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024080 osd.41 192.168.4.19:6809/30077 54892 : cluster [ERR] 10.2a shard 38 missing 10/a7383d2a/b6b9d21d-2f4f-4550-8928-52552349db7d/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024081 osd.41 192.168.4.19:6809/30077 54893 : cluster [ERR] 10.2a shard 38 missing 10/9c383d2a/5b552687-c709-4e87-b773-1cce5b262754/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024082 osd.41 192.168.4.19:6809/30077 54894 : cluster [ERR] 10.2a shard 38 missing 10/5d383d2a/cb1a2ea8-0872-4de9-8b93-5ea8d9d8e613/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024083 osd.41 192.168.4.19:6809/30077 54895 : cluster [ERR] 10.2a shard 38 missing 10/8f483d2a/74c7a2b9-f00a-4c89-afbd-c1b8439234ac/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024085 osd.41 192.168.4.19:6809/30077 54896 : cluster [ERR] 10.2a shard 38 missing 10/b1583d2a/b3f00768-82a2-4637-91d1-164f3a51312a/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024086 osd.41 192.168.4.19:6809/30077 54897 : cluster [ERR] 10.2a shard 38 missing 10/35583d2a/e347aff4-7b71-476e-863a-310e767e4160/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024088 osd.41 192.168.4.19:6809/30077 54898 : cluster [ERR] 10.2a shard 38 missing 10/69583d2a/0805d07a-49d1-44cb-87c7-3bd73a0ce692/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024122 osd.41 192.168.4.19:6809/30077 54899 : cluster [ERR] 10.2a shard 38 missing 10/1a583d2a/d65bcf6a-9457-46c3-8fbc-432ebbaad89a/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024123 osd.41 192.168.4.19:6809/30077 54900 : cluster [ERR] 10.2a shard 38 missing 10/6d583d2a/5592f7d6-a131-4eb2-a3dd-b2d96691dd7e/head
> /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024124 osd.41 192.168.4.19:6809/30077 54901 : cluster [ERR] 10.2a shard 38 missing 10/f0683d2a/81897399-4cb0-59b3-b9ae-bf043a272137:0003/head
>
>
>
> # ceph pg deep-scrub 10.2a
> instructing pg 10.2a on osd.41 to deep-scrub
>
>
> # ceph -w | grep 10.2a
>
>
> _______________________________________________
> ceph-users mailing list
> ceph-users@xxxxxxxxxxxxxx
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com



--
Cheers,
Brad
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux