Re: Scrub and deep-scrub repeating over and over

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

in case someone hit same problem, try to:
stop scrubbing by enabling "no scrub" and "no deep-scrub" flags
wait until scrub ends
restart monitors (one by one)
restart OSD servers (I've restarted all three of them, because there was small cluster, but this could be not necessary to restart all of them for big clusters)
remove "no scrub" and "no deep-scrub" flags

As I noticed, we went in this problem after upgrading cluster from Infernalis to Jewel. Anyway, this happened only on one of three upgraded clusters, so seems it's not very common problem.

Br,
Arvydas

On Thu, Sep 8, 2016 at 10:26 AM, Arvydas Opulskis <zebediejus@xxxxxxxxx> wrote:
Hi Goncalo, there it is:

# ceph pg 11.34a query
{
    "state": "active+clean+scrubbing",
    "snap_trimq": "[]",
    "epoch": 6547,
    "up": [
        24,
        3
    ],
    "acting": [
        24,
        3
    ],
    "actingbackfill": [
        "3",
        "24"
    ],
    "info": {
        "pgid": "11.34a",
        "last_update": "6547'85045",
        "last_complete": "6547'85045",
        "log_tail": "6215'81998",
        "last_user_version": 85045,
        "last_backfill": "MAX",
        "last_backfill_bitwise": 0,
        "purged_snaps": "[]",
        "history": {
            "epoch_created": 5178,
            "last_epoch_started": 5241,
            "last_epoch_clean": 5241,
            "last_epoch_split": 0,
            "last_epoch_marked_full": 0,
            "same_up_since": 5184,
            "same_interval_since": 5240,
            "same_primary_since": 5096,
            "last_scrub": "6547'85045",
            "last_scrub_stamp": "2016-09-08 09:20:06.804646",
            "last_deep_scrub": "6547'85045",
            "last_deep_scrub_stamp": "2016-09-08 09:18:22.582767",
            "last_clean_scrub_stamp": "2016-09-08 09:20:06.804646"
        },
        "stats": {
            "version": "6547'85045",
            "reported_seq": "219744",
            "reported_epoch": "6547",
            "state": "active+clean+scrubbing",
            "last_fresh": "2016-09-08 09:20:13.712725",
            "last_change": "2016-09-08 09:20:13.712725",
            "last_active": "2016-09-08 09:20:13.712725",
            "last_peered": "2016-09-08 09:20:13.712725",
            "last_clean": "2016-09-08 09:20:13.712725",
            "last_became_active": "2016-07-27 18:46:25.926150",
            "last_became_peered": "2016-07-27 18:46:25.926150",
            "last_unstale": "2016-09-08 09:20:13.712725",
            "last_undegraded": "2016-09-08 09:20:13.712725",
            "last_fullsized": "2016-09-08 09:20:13.712725",
            "mapping_epoch": 5185,
            "log_start": "6215'81998",
            "ondisk_log_start": "6215'81998",
            "created": 5178,
            "last_epoch_clean": 5241,
            "parent": "0.0",
            "parent_split_bits": 10,
            "last_scrub": "6547'85045",
            "last_scrub_stamp": "2016-09-08 09:20:06.804646",
            "last_deep_scrub": "6547'85045",
            "last_deep_scrub_stamp": "2016-09-08 09:18:22.582767",
            "last_clean_scrub_stamp": "2016-09-08 09:20:06.804646",
            "log_size": 3047,
            "ondisk_log_size": 3047,
            "stats_invalid": false,
            "dirty_stats_invalid": false,
            "omap_stats_invalid": false,
            "hitset_stats_invalid": false,
            "hitset_bytes_stats_invalid": false,
            "pin_stats_invalid": true,
            "stat_sum": {
                "num_bytes": 6225173162,
                "num_objects": 2688,
                "num_object_clones": 0,
                "num_object_copies": 5376,
                "num_objects_missing_on_primary": 0,
                "num_objects_missing": 0,
                "num_objects_degraded": 0,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 0,
                "num_objects_dirty": 2688,
                "num_whiteouts": 0,
                "num_read": 3416,
                "num_read_kb": 710270,
                "num_write": 16467,
                "num_write_kb": 2275320,
                "num_scrub_errors": 0,
                "num_shallow_scrub_errors": 0,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 0,
                "num_bytes_recovered": 0,
                "num_keys_recovered": 0,
                "num_objects_omap": 0,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0,
                "num_flush": 0,
                "num_flush_kb": 0,
                "num_evict": 0,
                "num_evict_kb": 0,
                "num_promote": 0,
                "num_flush_mode_high": 0,
                "num_flush_mode_low": 0,
                "num_evict_mode_some": 0,
                "num_evict_mode_full": 0,
                "num_objects_pinned": 0
            },
            "up": [
                24,
                3
            ],
            "acting": [
                24,
                3
            ],
            "blocked_by": [],
            "up_primary": 24,
            "acting_primary": 24
        },
        "empty": 0,
        "dne": 0,
        "incomplete": 0,
        "last_epoch_started": 5241,
        "hit_set_history": {
            "current_last_update": "0'0",
            "history": []
        }
    },
    "peer_info": [
        {
            "peer": "3",
            "pgid": "11.34a",
            "last_update": "6547'85045",
            "last_complete": "6547'85045",
            "log_tail": "4988'75612",
            "last_user_version": 0,
            "last_backfill": "MAX",
            "last_backfill_bitwise": 1,
            "purged_snaps": "[]",
            "history": {
                "epoch_created": 5178,
                "last_epoch_started": 5241,
                "last_epoch_clean": 5241,
                "last_epoch_split": 0,
                "last_epoch_marked_full": 0,
                "same_up_since": 5184,
                "same_interval_since": 5240,
                "same_primary_since": 5096,
                "last_scrub": "6547'85045",
                "last_scrub_stamp": "2016-09-08 09:20:06.804646",
                "last_deep_scrub": "6547'85045",
                "last_deep_scrub_stamp": "2016-09-08 09:18:22.582767",
                "last_clean_scrub_stamp": "2016-09-08 09:20:06.804646"
            },
            "stats": {
                "version": "5174'78681",
                "reported_seq": "68548",
                "reported_epoch": "5239",
                "state": "active+remapped+backfilling",
                "last_fresh": "2016-07-27 18:46:23.904812",
                "last_change": "2016-07-27 18:39:52.227105",
                "last_active": "2016-07-27 18:46:23.904812",
                "last_peered": "2016-07-27 18:46:23.904812",
                "last_clean": "2016-07-27 18:32:30.929929",
                "last_became_active": "2016-07-27 18:34:25.035629",
                "last_became_peered": "2016-07-27 18:34:25.035629",
                "last_unstale": "2016-07-27 18:46:23.904812",
                "last_undegraded": "2016-07-27 18:46:23.904812",
                "last_fullsized": "2016-07-27 18:46:23.904812",
                "mapping_epoch": 5185,
                "log_start": "4988'75612",
                "ondisk_log_start": "4988'75612",
                "created": 5178,
                "last_epoch_clean": 5183,
                "parent": "0.0",
                "parent_split_bits": 10,
                "last_scrub": "5015'78540",
                "last_scrub_stamp": "2016-07-22 10:10:55.296356",
                "last_deep_scrub": "5015'78540",
                "last_deep_scrub_stamp": "2016-07-22 10:10:55.296356",
                "last_clean_scrub_stamp": "2016-07-22 10:10:55.296356",
                "log_size": 3069,
                "ondisk_log_size": 3069,
                "stats_invalid": true,
                "dirty_stats_invalid": false,
                "omap_stats_invalid": false,
                "hitset_stats_invalid": false,
                "hitset_bytes_stats_invalid": false,
                "pin_stats_invalid": true,
                "stat_sum": {
                    "num_bytes": 4469376265,
                    "num_objects": 1704,
                    "num_object_clones": 0,
                    "num_object_copies": 5112,
                    "num_objects_missing_on_primary": 0,
                    "num_objects_missing": 0,
                    "num_objects_degraded": 0,
                    "num_objects_misplaced": 1711,
                    "num_objects_unfound": 0,
                    "num_objects_dirty": 1704,
                    "num_whiteouts": 0,
                    "num_read": 9692,
                    "num_read_kb": 6474215,
                    "num_write": 43858,
                    "num_write_kb": 14418818,
                    "num_scrub_errors": 0,
                    "num_shallow_scrub_errors": 0,
                    "num_deep_scrub_errors": 0,
                    "num_objects_recovered": 4190,
                    "num_bytes_recovered": 10727412780,
                    "num_keys_recovered": 0,
                    "num_objects_omap": 0,
                    "num_objects_hit_set_archive": 0,
                    "num_bytes_hit_set_archive": 0,
                    "num_flush": 0,
                    "num_flush_kb": 0,
                    "num_evict": 0,
                    "num_evict_kb": 0,
                    "num_promote": 0,
                    "num_flush_mode_high": 0,
                    "num_flush_mode_low": 0,
                    "num_evict_mode_some": 0,
                    "num_evict_mode_full": 0,
                    "num_objects_pinned": 0
                },
                "up": [
                    24,
                    3
                ],
                "acting": [
                    24,
                    3
                ],
                "blocked_by": [],
                "up_primary": 24,
                "acting_primary": 24
            },
            "empty": 0,
            "dne": 0,
            "incomplete": 0,
            "last_epoch_started": 5241,
            "hit_set_history": {
                "current_last_update": "0'0",
                "history": []
            }
        }
    ],
    "recovery_state": [
        {
            "name": "Started\/Primary\/Active",
            "enter_time": "2016-07-27 18:46:25.890580",
            "might_have_unfound": [],
            "recovery_progress": {
                "backfill_targets": [],
                "waiting_on_backfill": [],
                "last_backfill_started": "MIN",
                "backfill_info": {
                    "begin": "MIN",
                    "end": "MIN",
                    "objects": []
                },
                "peer_backfill_info": [],
                "backfills_in_flight": [],
                "recovering": [],
                "pg_backend": {
                    "pull_from_peer": [],
                    "pushing": []
                }
            },
            "scrub": {
                "scrubber.epoch_start": "5240",
                "scrubber.active": 1,
                "scrubber.state": "WAIT_REPLICAS",
                "scrubber.start": "11:52c3e5be::::0",
                "scrubber.end": "11:52c45d0a::::0",
                "scrubber.subset_last_update": "6538'84947",
                "scrubber.deep": false,
                "scrubber.seed": 4294967295,
                "scrubber.waiting_on": 1,
                "scrubber.waiting_on_whom": [
                    "3"
                ]
            }
        },
        {
            "name": "Started",
            "enter_time": "2016-07-27 18:46:24.832320"
        }
    ],
    "agent_state": {}
}



On Thu, Sep 8, 2016 at 10:16 AM, Goncalo Borges <goncalo.borges@xxxxxxxxxxxxx> wrote:
Can you please share the result of

    ceph pg 11.34a query

?


On 09/08/2016 05:03 PM, Arvydas Opulskis wrote:
2016-09-08 08:45:01.441945 osd.24 [INF] 11.34a scrub starts
2016-09-08 08:45:03.585039 osd.24 [INF] 11.34a scrub ok

--
Goncalo Borges
Research Computing
ARC Centre of Excellence for Particle Physics at the Terascale
School of Physics A28 | University of Sydney, NSW  2006
T: +61 2 93511937

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com


_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux