From: Darrick J. Wong <djwong@xxxxxxxxxx> Certain types of metadata have dependencies that cross scrub groups. For example, after a repair the part of realtime bitmap corresponding to a realtime group, we potentially need to rebuild the realtime summary to reflect the new bitmap contents. The rtsummary is a separate scrub group (metafiles) from the rgbitmap (rtgroup), which means that the rtsummary repairs must be tracked by a separate scrub_item. Create the necessary dependency table and code to make these kinds of cross-group validations possible. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- scrub/phase4.c | 43 ++++++++++++++++++++ scrub/repair.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ scrub/repair.h | 5 ++ 3 files changed, 171 insertions(+) diff --git a/scrub/phase4.c b/scrub/phase4.c index 74fcc55b379..2d0a448e268 100644 --- a/scrub/phase4.c +++ b/scrub/phase4.c @@ -42,6 +42,47 @@ struct repair_list_schedule { bool made_progress; }; +/* + * After a successful repair, schedule any additional revalidations needed in + * other scrub groups. + */ +static int +revalidate_across_groups( + struct scrub_ctx *ctx, + const struct action_item *old_aitem, + struct repair_list_schedule *rls) +{ + struct action_list alist; + int error; + + action_list_init(&alist); + + error = action_item_schedule_revalidation(ctx, old_aitem, &alist); + if (error) { + rls->aborted = true; + return error; + } + + if (action_list_empty(&alist)) + return 0; + + pthread_mutex_unlock(&rls->lock); + error = action_list_revalidate(ctx, &alist); + pthread_mutex_lock(&rls->lock); + + if (error) + rls->aborted = true; + else + rls->made_progress = true; + + /* + * Merge the action items into the scrub context for freeing, even if + * there was an error. + */ + action_list_merge(&rls->requeue_list, &alist); + return error; +} + /* Try to repair as many things on our list as we can. */ static void repair_list_worker( @@ -89,6 +130,8 @@ repair_list_worker( action_list_add(&rls->requeue_list, aitem); break; case TR_REPAIRED: + revalidate_across_groups(ctx, aitem, rls); + /* Item is clean. Free it. */ free(aitem); break; diff --git a/scrub/repair.c b/scrub/repair.c index 79a15f907a1..3e00db7a2fd 100644 --- a/scrub/repair.c +++ b/scrub/repair.c @@ -42,6 +42,15 @@ static const unsigned int repair_deps[XFS_SCRUB_TYPE_NR] = { DEP(XFS_SCRUB_TYPE_PQUOTA), [XFS_SCRUB_TYPE_RTSUM] = DEP(XFS_SCRUB_TYPE_RTBITMAP), }; + +/* + * Data dependencies that cross scrub groups. When we repair a metadata object + * of the given type (e.g. rtgroup bitmaps), we want to trigger a revalidation + * of the specified objects (e.g. rt summary file). + */ +static const unsigned int cross_group_recheck[XFS_SCRUB_TYPE_NR] = { + [XFS_SCRUB_TYPE_RGBITMAP] = DEP(XFS_SCRUB_TYPE_RTSUM), +}; #undef DEP /* @@ -781,3 +790,117 @@ repair_item_to_action_item( *aitemp = aitem; return 0; } + +static int +schedule_cross_group_recheck( + struct scrub_ctx *ctx, + unsigned int recheck_mask, + struct action_list *new_items) +{ + unsigned int scrub_type; + + foreach_scrub_type(scrub_type) { + struct action_item *aitem; + + if (!(recheck_mask & (1U << scrub_type))) + continue; + + switch (xfrog_scrubbers[scrub_type].group) { + case XFROG_SCRUB_GROUP_METAFILES: + /* + * XXX gcc fortify gets confused on the memset in + * scrub_item_init_fs if we hoist this allocation to a + * helper function. + */ + aitem = malloc(sizeof(struct action_item)); + if (!aitem) { + int error = errno; + + str_liberror(ctx, error, + _("creating repair revalidation action item")); + return error; + } + + INIT_LIST_HEAD(&aitem->list); + aitem->sri.sri_revalidate = true; + + scrub_item_init_fs(&aitem->sri); + scrub_item_schedule(&aitem->sri, scrub_type); + action_list_add(new_items, aitem); + break; + default: + /* We don't support any other groups yet. */ + assert(false); + continue; + } + } + + return 0; +} + +/* + * After a successful repair, schedule revalidation of metadata outside of this + * scrub item's group. + */ +int +action_item_schedule_revalidation( + struct scrub_ctx *ctx, + const struct action_item *old_aitem, + struct action_list *new_items) +{ + struct action_item *aitem, *n; + unsigned int scrub_type; + int error = 0; + + foreach_scrub_type(scrub_type) { + unsigned int mask; + + if (!(old_aitem->sri.sri_selected & (1U << scrub_type))) + continue; + mask = cross_group_recheck[scrub_type]; + if (!mask) + continue; + + error = schedule_cross_group_recheck(ctx, mask, new_items); + if (error) + goto bad; + } + + return 0; +bad: + list_for_each_entry_safe(aitem, n, &new_items->list, list) { + list_del(&aitem->list); + free(aitem); + } + return error; +} + +/* + * Revalidate all items scheduled for a recheck, and drop the ones that are + * clean. + */ +int +action_list_revalidate( + struct scrub_ctx *ctx, + struct action_list *alist) +{ + struct action_item *aitem, *n; + int error; + + list_for_each_entry_safe(aitem, n, &alist->list, list) { + error = scrub_item_check(ctx, &aitem->sri); + if (error) + return error; + + if (repair_item_count_needsrepair(&aitem->sri) > 0) { + aitem->sri.sri_revalidate = false; + continue; + } + + /* Metadata are clean, delete from list. */ + list_del(&aitem->list); + free(aitem); + } + + return 0; +} diff --git a/scrub/repair.h b/scrub/repair.h index c4b9b5799e2..f90ac16b13f 100644 --- a/scrub/repair.h +++ b/scrub/repair.h @@ -50,6 +50,11 @@ enum tryrepair_outcome { int action_item_try_repair(struct scrub_ctx *ctx, struct action_item *aitem, enum tryrepair_outcome *outcome); +int action_item_schedule_revalidation(struct scrub_ctx *ctx, + const struct action_item *old_aitem, + struct action_list *new_items); +int action_list_revalidate(struct scrub_ctx *sc, struct action_list *alist); + void repair_item_mustfix(struct scrub_item *sri, struct scrub_item *fix_now); /* Primary metadata is corrupt */