From: Darrick J. Wong <djwong@xxxxxxxxxx> When we've finished making repairs to some domain of filesystem metadata (file, AG, etc.) to correct an inconsistency, we should recheck all the other metadata types within that domain to make sure that we neither made things worse nor introduced more cross-referencing problems. If we did, requeue the item to make the repairs. If the only changes we made were optimizations, don't bother. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- scrub/repair.c | 37 +++++++++++++++++++++++++++++++++++++ scrub/scrub.c | 5 +++-- scrub/scrub.h | 10 ++++++++++ scrub/scrub_private.h | 2 ++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/scrub/repair.c b/scrub/repair.c index dd608629f2a..cf17bbd8d0e 100644 --- a/scrub/repair.c +++ b/scrub/repair.c @@ -476,8 +476,10 @@ action_item_try_repair( { struct scrub_item *sri = &aitem->sri; unsigned int before, after; + unsigned int scrub_type; int ret; + BUILD_BUG_ON(sizeof(sri->sri_selected) * NBBY < XFS_SCRUB_TYPE_NR); before = repair_item_count_needsrepair(sri); ret = repair_item(ctx, sri, 0); @@ -498,6 +500,41 @@ action_item_try_repair( return 0; } + /* + * Nothing in this fs object was marked inconsistent. This means we + * were merely optimizing metadata and there is no revalidation work to + * be done. + */ + if (!sri->sri_inconsistent) { + *outcome = TR_REPAIRED; + return 0; + } + + /* + * We fixed inconsistent metadata, so reschedule the entire object for + * immediate revalidation to see if anything else went wrong. + */ + foreach_scrub_type(scrub_type) + if (sri->sri_selected & (1U << scrub_type)) + sri->sri_state[scrub_type] = SCRUB_ITEM_NEEDSCHECK; + sri->sri_inconsistent = false; + sri->sri_revalidate = true; + + ret = scrub_item_check(ctx, sri); + if (ret) + return ret; + + after = repair_item_count_needsrepair(sri); + if (after > 0) { + /* + * Uhoh, we found something else broken. Tell the caller that + * this item needs to be queued for more repairs. + */ + sri->sri_revalidate = false; + *outcome = TR_REQUEUE; + return 0; + } + /* Repairs complete. */ *outcome = TR_REPAIRED; return 0; diff --git a/scrub/scrub.c b/scrub/scrub.c index 6f9f182c629..c245e46afa7 100644 --- a/scrub/scrub.c +++ b/scrub/scrub.c @@ -117,11 +117,12 @@ xfs_check_metadata( dbg_printf("check %s flags %xh\n", descr_render(&dsc), meta.sm_flags); error = -xfrog_scrub_metadata(xfdp, &meta); - if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !error) - meta.sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; switch (error) { case 0: /* No operational errors encountered. */ + if (!sri->sri_revalidate && + debug_tweak_on("XFS_SCRUB_FORCE_REPAIR")) + meta.sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; break; case ENOENT: /* Metadata not present, just skip it. */ diff --git a/scrub/scrub.h b/scrub/scrub.h index e1ce6ed2f59..53354099c81 100644 --- a/scrub/scrub.h +++ b/scrub/scrub.h @@ -59,11 +59,20 @@ struct scrub_item { __u32 sri_gen; __u32 sri_agno; + /* Bitmask of scrub types that were scheduled here. */ + __u32 sri_selected; + /* Scrub item state flags, one for each XFS_SCRUB_TYPE. */ __u8 sri_state[XFS_SCRUB_TYPE_NR]; /* Track scrub and repair call retries for each scrub type. */ __u8 sri_tries[XFS_SCRUB_TYPE_NR]; + + /* Were there any corruption repairs needed? */ + bool sri_inconsistent:1; + + /* Are we revalidating after repairs? */ + bool sri_revalidate:1; }; #define foreach_scrub_type(loopvar) \ @@ -103,6 +112,7 @@ static inline void scrub_item_schedule(struct scrub_item *sri, unsigned int scrub_type) { sri->sri_state[scrub_type] = SCRUB_ITEM_NEEDSCHECK; + sri->sri_selected |= (1U << scrub_type); } void scrub_item_schedule_group(struct scrub_item *sri, diff --git a/scrub/scrub_private.h b/scrub/scrub_private.h index 6380779c90b..c60ea555885 100644 --- a/scrub/scrub_private.h +++ b/scrub/scrub_private.h @@ -71,6 +71,8 @@ scrub_item_save_state( unsigned int scrub_flags) { sri->sri_state[scrub_type] = scrub_flags & SCRUB_ITEM_REPAIR_ANY; + if (scrub_flags & SCRUB_ITEM_NEEDSREPAIR) + sri->sri_inconsistent = true; } static inline void