Re: scrub errors

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hammer is no longer supported.

What's the status of osds 7 and 17?

On Tue, Mar 26, 2019 at 8:56 AM solarflow99 <solarflow99@xxxxxxxxx> wrote:
>
> hi, thanks.  Its still using Hammer.  Here's the output from the pg query, the last command you gave doesn't work at all but be too old.
>
>
> # ceph pg 10.2a query
> {
>     "state": "active+clean+inconsistent",
>     "snap_trimq": "[]",
>     "epoch": 23265,
>     "up": [
>         41,
>         38,
>         8
>     ],
>     "acting": [
>         41,
>         38,
>         8
>     ],
>     "actingbackfill": [
>         "8",
>         "38",
>         "41"
>     ],
>     "info": {
>         "pgid": "10.2a",
>         "last_update": "23265'20886859",
>         "last_complete": "23265'20886859",
>         "log_tail": "23265'20883809",
>         "last_user_version": 20886859,
>         "last_backfill": "MAX",
>         "purged_snaps": "[]",
>         "history": {
>             "epoch_created": 8200,
>             "last_epoch_started": 21481,
>             "last_epoch_clean": 21487,
>             "last_epoch_split": 0,
>             "same_up_since": 21472,
>             "same_interval_since": 21474,
>             "same_primary_since": 8244,
>             "last_scrub": "23265'20864209",
>             "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>             "last_deep_scrub": "23265'20864209",
>             "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>             "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
>         },
>         "stats": {
>             "version": "23265'20886859",
>             "reported_seq": "10109937",
>             "reported_epoch": "23265",
>             "state": "active+clean+inconsistent",
>             "last_fresh": "2019-03-25 15:52:53.720768",
>             "last_change": "2019-03-22 22:39:13.931038",
>             "last_active": "2019-03-25 15:52:53.720768",
>             "last_peered": "2019-03-25 15:52:53.720768",
>             "last_clean": "2019-03-25 15:52:53.720768",
>             "last_became_active": "0.000000",
>             "last_became_peered": "0.000000",
>             "last_unstale": "2019-03-25 15:52:53.720768",
>             "last_undegraded": "2019-03-25 15:52:53.720768",
>             "last_fullsized": "2019-03-25 15:52:53.720768",
>             "mapping_epoch": 21472,
>             "log_start": "23265'20883809",
>             "ondisk_log_start": "23265'20883809",
>             "created": 8200,
>             "last_epoch_clean": 21487,
>             "parent": "0.0",
>             "parent_split_bits": 0,
>             "last_scrub": "23265'20864209",
>             "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>             "last_deep_scrub": "23265'20864209",
>             "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>             "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438",
>             "log_size": 3050,
>             "ondisk_log_size": 3050,
>             "stats_invalid": "0",
>             "stat_sum": {
>                 "num_bytes": 8220278746,
>                 "num_objects": 345034,
>                 "num_object_clones": 0,
>                 "num_object_copies": 1035102,
>                 "num_objects_missing_on_primary": 0,
>                 "num_objects_degraded": 0,
>                 "num_objects_misplaced": 0,
>                 "num_objects_unfound": 0,
>                 "num_objects_dirty": 345034,
>                 "num_whiteouts": 0,
>                 "num_read": 7904350,
>                 "num_read_kb": 58116568,
>                 "num_write": 8753504,
>                 "num_write_kb": 85104263,
>                 "num_scrub_errors": 47,
>                 "num_shallow_scrub_errors": 47,
>                 "num_deep_scrub_errors": 0,
>                 "num_objects_recovered": 167138,
>                 "num_bytes_recovered": 5193543924,
>                 "num_keys_recovered": 0,
>                 "num_objects_omap": 0,
>                 "num_objects_hit_set_archive": 0,
>                 "num_bytes_hit_set_archive": 0
>             },
>             "up": [
>                 41,
>                 38,
>                 8
>             ],
>             "acting": [
>                 41,
>                 38,
>                 8
>             ],
>             "blocked_by": [],
>             "up_primary": 41,
>             "acting_primary": 41
>         },
>         "empty": 0,
>         "dne": 0,
>         "incomplete": 0,
>         "last_epoch_started": 21481,
>         "hit_set_history": {
>             "current_last_update": "0'0",
>             "current_last_stamp": "0.000000",
>             "current_info": {
>                 "begin": "0.000000",
>                 "end": "0.000000",
>                 "version": "0'0",
>                 "using_gmt": "0"
>             },
>             "history": []
>         }
>     },
>     "peer_info": [
>         {
>             "peer": "8",
>             "pgid": "10.2a",
>             "last_update": "23265'20886859",
>             "last_complete": "23265'20886859",
>             "log_tail": "21395'11840466",
>             "last_user_version": 11843648,
>             "last_backfill": "MAX",
>             "purged_snaps": "[]",
>             "history": {
>                 "epoch_created": 8200,
>                 "last_epoch_started": 21481,
>                 "last_epoch_clean": 21487,
>                 "last_epoch_split": 0,
>                 "same_up_since": 21472,
>                 "same_interval_since": 21474,
>                 "same_primary_since": 8244,
>                 "last_scrub": "23265'20864209",
>                 "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>                 "last_deep_scrub": "23265'20864209",
>                 "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>                 "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
>             },
>             "stats": {
>                 "version": "21471'11843647",
>                 "reported_seq": "7670875",
>                 "reported_epoch": "21471",
>                 "state": "active+undersized+degraded+remapped+wait_backfill",
>                 "last_fresh": "2018-09-22 07:07:23.061013",
>                 "last_change": "2018-09-22 06:39:32.487204",
>                 "last_active": "2018-09-22 07:07:23.061013",
>                 "last_peered": "2018-09-22 07:07:23.061013",
>                 "last_clean": "2018-09-22 06:33:47.246063",
>                 "last_became_active": "0.000000",
>                 "last_became_peered": "0.000000",
>                 "last_unstale": "2018-09-22 07:07:23.061013",
>                 "last_undegraded": "2018-09-22 06:39:13.626445",
>                 "last_fullsized": "2018-09-22 06:39:13.626445",
>                 "mapping_epoch": 21472,
>                 "log_start": "21395'11840466",
>                 "ondisk_log_start": "21395'11840466",
>                 "created": 8200,
>                 "last_epoch_clean": 21397,
>                 "parent": "0.0",
>                 "parent_split_bits": 0,
>                 "last_scrub": "21395'11835365",
>                 "last_scrub_stamp": "2018-09-21 12:11:47.230141",
>                 "last_deep_scrub": "21395'11835365",
>                 "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
>                 "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
>                 "log_size": 3181,
>                 "ondisk_log_size": 3181,
>                 "stats_invalid": "0",
>                 "stat_sum": {
>                     "num_bytes": 6406027390,
>                     "num_objects": 241710,
>                     "num_object_clones": 0,
>                     "num_object_copies": 966844,
>                     "num_objects_missing_on_primary": 0,
>                     "num_objects_degraded": 241715,
>                     "num_objects_misplaced": 725133,
>                     "num_objects_unfound": 0,
>                     "num_objects_dirty": 241710,
>                     "num_whiteouts": 0,
>                     "num_read": 5638025,
>                     "num_read_kb": 48736266,
>                     "num_write": 6789818,
>                     "num_write_kb": 67680335,
>                     "num_scrub_errors": 0,
>                     "num_shallow_scrub_errors": 0,
>                     "num_deep_scrub_errors": 0,
>                     "num_objects_recovered": 167079,
>                     "num_bytes_recovered": 5191625476,
>                     "num_keys_recovered": 0,
>                     "num_objects_omap": 0,
>                     "num_objects_hit_set_archive": 0,
>                     "num_bytes_hit_set_archive": 0
>                 },
>                 "up": [
>                     41,
>                     38,
>                     8
>                 ],
>                 "acting": [
>                     41,
>                     38,
>                     8
>                 ],
>                 "blocked_by": [],
>                 "up_primary": 41,
>                 "acting_primary": 41
>             },
>             "empty": 0,
>             "dne": 0,
>             "incomplete": 0,
>             "last_epoch_started": 21481,
>             "hit_set_history": {
>                 "current_last_update": "0'0",
>                 "current_last_stamp": "0.000000",
>                 "current_info": {
>                     "begin": "0.000000",
>                     "end": "0.000000",
>                     "version": "0'0",
>                     "using_gmt": "0"
>                 },
>                 "history": []
>             }
>         },
>         {
>             "peer": "38",
>             "pgid": "10.2a",
>             "last_update": "23265'20886859",
>             "last_complete": "21395'11843517",
>             "log_tail": "21395'11840466",
>             "last_user_version": 11843517,
>             "last_backfill": "MAX",
>             "purged_snaps": "[]",
>             "history": {
>                 "epoch_created": 8200,
>                 "last_epoch_started": 21481,
>                 "last_epoch_clean": 21487,
>                 "last_epoch_split": 0,
>                 "same_up_since": 21472,
>                 "same_interval_since": 21474,
>                 "same_primary_since": 8244,
>                 "last_scrub": "23265'20864209",
>                 "last_scrub_stamp": "2019-03-22 22:39:13.930673",
>                 "last_deep_scrub": "23265'20864209",
>                 "last_deep_scrub_stamp": "2019-03-22 22:39:13.930673",
>                 "last_clean_scrub_stamp": "2019-03-15 01:33:21.447438"
>             },
>             "stats": {
>                 "version": "21395'11843516",
>                 "reported_seq": "7670719",
>                 "reported_epoch": "21395",
>                 "state": "active+clean",
>                 "last_fresh": "2018-09-22 06:33:14.791334",
>                 "last_change": "2018-09-21 12:11:47.230557",
>                 "last_active": "2018-09-22 06:33:14.791334",
>                 "last_peered": "2018-09-22 06:33:14.791334",
>                 "last_clean": "2018-09-22 06:33:14.791334",
>                 "last_became_active": "0.000000",
>                 "last_became_peered": "0.000000",
>                 "last_unstale": "2018-09-22 06:33:14.791334",
>                 "last_undegraded": "2018-09-22 06:33:14.791334",
>                 "last_fullsized": "2018-09-22 06:33:14.791334",
>                 "mapping_epoch": 21472,
>                 "log_start": "21395'11840466",
>                 "ondisk_log_start": "21395'11840466",
>                 "created": 8200,
>                 "last_epoch_clean": 20840,
>                 "parent": "0.0",
>                 "parent_split_bits": 0,
>                 "last_scrub": "21395'11835365",
>                 "last_scrub_stamp": "2018-09-21 12:11:47.230141",
>                 "last_deep_scrub": "21395'11835365",
>                 "last_deep_scrub_stamp": "2018-09-21 12:11:47.230141",
>                 "last_clean_scrub_stamp": "2018-09-21 12:11:47.230141",
>                 "log_size": 3050,
>                 "ondisk_log_size": 3050,
>                 "stats_invalid": "0",
>                 "stat_sum": {
>                     "num_bytes": 6405126628,
>                     "num_objects": 241711,
>                     "num_object_clones": 0,
>                     "num_object_copies": 725130,
>                     "num_objects_missing_on_primary": 0,
>                     "num_objects_degraded": 0,
>                     "num_objects_misplaced": 0,
>                     "num_objects_unfound": 0,
>                     "num_objects_dirty": 241711,
>                     "num_whiteouts": 0,
>                     "num_read": 5637862,
>                     "num_read_kb": 48735376,
>                     "num_write": 6789687,
>                     "num_write_kb": 67678402,
>                     "num_scrub_errors": 0,
>                     "num_shallow_scrub_errors": 0,
>                     "num_deep_scrub_errors": 0,
>                     "num_objects_recovered": 167079,
>                     "num_bytes_recovered": 5191625476,
>                     "num_keys_recovered": 0,
>                     "num_objects_omap": 0,
>                     "num_objects_hit_set_archive": 0,
>                     "num_bytes_hit_set_archive": 0
>                 },
>                 "up": [
>                     41,
>                     38,
>                     8
>                 ],
>                 "acting": [
>                     41,
>                     38,
>                     8
>                 ],
>                 "blocked_by": [],
>                 "up_primary": 41,
>                 "acting_primary": 41
>             },
>             "empty": 0,
>             "dne": 0,
>             "incomplete": 0,
>             "last_epoch_started": 21481,
>             "hit_set_history": {
>                 "current_last_update": "0'0",
>                 "current_last_stamp": "0.000000",
>                 "current_info": {
>                     "begin": "0.000000",
>                     "end": "0.000000",
>                     "version": "0'0",
>                     "using_gmt": "0"
>                 },
>                 "history": []
>             }
>         }
>     ],
>     "recovery_state": [
>         {
>             "name": "Started\/Primary\/Active",
>             "enter_time": "2018-09-22 07:07:48.637248",
>             "might_have_unfound": [
>                 {
>                     "osd": "7",
>                     "status": "not queried"
>                 },
>                 {
>                     "osd": "8",
>                     "status": "already probed"
>                 },
>                 {
>                     "osd": "17",
>                     "status": "not queried"
>                 },
>                 {
>                     "osd": "38",
>                     "status": "already probed"
>                 }
>             ],
>             "recovery_progress": {
>                 "backfill_targets": [],
>                 "waiting_on_backfill": [],
>                 "last_backfill_started": "-1\/0\/\/0",
>                 "backfill_info": {
>                     "begin": "-1\/0\/\/0",
>                     "end": "-1\/0\/\/0",
>                     "objects": []
>                 },
>                 "peer_backfill_info": [],
>                 "backfills_in_flight": [],
>                 "recovering": [],
>                 "pg_backend": {
>                     "pull_from_peer": [],
>                     "pushing": []
>                 }
>             },
>             "scrub": {
>                 "scrubber.epoch_start": "21474",
>                 "scrubber.active": 0,
>                 "scrubber.waiting_on": 0,
>                 "scrubber.waiting_on_whom": []
>             }
>         },
>         {
>             "name": "Started",
>             "enter_time": "2018-09-22 07:07:42.138358"
>         }
>     ],
>     "agent_state": {}
> }
>
>
> On Mon, Mar 25, 2019 at 3:46 PM Brad Hubbard <bhubbard@xxxxxxxxxx> wrote:
>>
>> It would help to know what version you are running but, to begin with,
>> could you post the output of the following?
>>
>> $ sudo ceph pg 10.2a query
>> $ sudo rados list-inconsistent-obj 10.2a --format=json-pretty
>>
>> Also, have a read of
>> http://docs.ceph.com/docs/mimic/rados/troubleshooting/troubleshooting-pg/
>> (adjust the URl for your release).
>>
>> On Tue, Mar 26, 2019 at 8:19 AM solarflow99 <solarflow99@xxxxxxxxx> wrote:
>> >
>> > I noticed my cluster has scrub errors but the deep-scrub command doesn't show any errors.  Is there any way to know what it takes to fix it?
>> >
>> >
>> >
>> > # ceph health detail
>> > HEALTH_ERR 1 pgs inconsistent; 47 scrub errors
>> > pg 10.2a is active+clean+inconsistent, acting [41,38,8]
>> > 47 scrub errors
>> >
>> > # zgrep 10.2a /var/log/ceph/ceph.log*
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 16:20:18.148299 osd.41 192.168.4.19:6809/30077 54885 : cluster [INF] 10.2a deep-scrub starts
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024040 osd.41 192.168.4.19:6809/30077 54886 : cluster [ERR] 10.2a shard 38 missing 10/24083d2a/ec50777d-cc99-46a8-8610-4492213f412f/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024049 osd.41 192.168.4.19:6809/30077 54887 : cluster [ERR] 10.2a shard 38 missing 10/ff183d2a/fce859b9-61a9-46cb-82f1-4b4af31c10db/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024074 osd.41 192.168.4.19:6809/30077 54888 : cluster [ERR] 10.2a shard 38 missing 10/34283d2a/4b7c96cb-c494-4637-8669-e42049bd0e1c/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024076 osd.41 192.168.4.19:6809/30077 54889 : cluster [ERR] 10.2a shard 38 missing 10/df283d2a/bbe61149-99f8-4b83-a42b-b208d18094a8/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024077 osd.41 192.168.4.19:6809/30077 54890 : cluster [ERR] 10.2a shard 38 missing 10/35383d2a/60e8ed9b-bd04-5a43-8917-6f29eba28a66:0014/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024078 osd.41 192.168.4.19:6809/30077 54891 : cluster [ERR] 10.2a shard 38 missing 10/d5383d2a/2bdeb186-561b-4151-b87e-fe7c2e217d41/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024080 osd.41 192.168.4.19:6809/30077 54892 : cluster [ERR] 10.2a shard 38 missing 10/a7383d2a/b6b9d21d-2f4f-4550-8928-52552349db7d/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024081 osd.41 192.168.4.19:6809/30077 54893 : cluster [ERR] 10.2a shard 38 missing 10/9c383d2a/5b552687-c709-4e87-b773-1cce5b262754/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024082 osd.41 192.168.4.19:6809/30077 54894 : cluster [ERR] 10.2a shard 38 missing 10/5d383d2a/cb1a2ea8-0872-4de9-8b93-5ea8d9d8e613/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024083 osd.41 192.168.4.19:6809/30077 54895 : cluster [ERR] 10.2a shard 38 missing 10/8f483d2a/74c7a2b9-f00a-4c89-afbd-c1b8439234ac/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024085 osd.41 192.168.4.19:6809/30077 54896 : cluster [ERR] 10.2a shard 38 missing 10/b1583d2a/b3f00768-82a2-4637-91d1-164f3a51312a/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024086 osd.41 192.168.4.19:6809/30077 54897 : cluster [ERR] 10.2a shard 38 missing 10/35583d2a/e347aff4-7b71-476e-863a-310e767e4160/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024088 osd.41 192.168.4.19:6809/30077 54898 : cluster [ERR] 10.2a shard 38 missing 10/69583d2a/0805d07a-49d1-44cb-87c7-3bd73a0ce692/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024122 osd.41 192.168.4.19:6809/30077 54899 : cluster [ERR] 10.2a shard 38 missing 10/1a583d2a/d65bcf6a-9457-46c3-8fbc-432ebbaad89a/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024123 osd.41 192.168.4.19:6809/30077 54900 : cluster [ERR] 10.2a shard 38 missing 10/6d583d2a/5592f7d6-a131-4eb2-a3dd-b2d96691dd7e/head
>> > /var/log/ceph/ceph.log-20190323.gz:2019-03-22 18:29:02.024124 osd.41 192.168.4.19:6809/30077 54901 : cluster [ERR] 10.2a shard 38 missing 10/f0683d2a/81897399-4cb0-59b3-b9ae-bf043a272137:0003/head
>> >
>> >
>> >
>> > # ceph pg deep-scrub 10.2a
>> > instructing pg 10.2a on osd.41 to deep-scrub
>> >
>> >
>> > # ceph -w | grep 10.2a
>> >
>> >
>> > _______________________________________________
>> > ceph-users mailing list
>> > ceph-users@xxxxxxxxxxxxxx
>> > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>>
>>
>>
>> --
>> Cheers,
>> Brad



-- 
Cheers,
Brad
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux