[PATCH 40/41] xfs_scrub: retest metadata across scrub groups after a repair

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Darrick J. Wong <djwong@xxxxxxxxxx>

Certain types of metadata have dependencies that cross scrub groups.
For example, after a repair the part of realtime bitmap corresponding to
a realtime group, we potentially need to rebuild the realtime summary to
reflect the new bitmap contents.  The rtsummary is a separate scrub group
(metafiles) from the rgbitmap (rtgroup), which means that the rtsummary
repairs must be tracked by a separate scrub_item.

Create the necessary dependency table and code to make these kinds of
cross-group validations possible.

Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
---
 scrub/phase4.c |   43 ++++++++++++++++++++
 scrub/repair.c |  123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 scrub/repair.h |    5 ++
 3 files changed, 171 insertions(+)


diff --git a/scrub/phase4.c b/scrub/phase4.c
index 74fcc55b379..2d0a448e268 100644
--- a/scrub/phase4.c
+++ b/scrub/phase4.c
@@ -42,6 +42,47 @@ struct repair_list_schedule {
 	bool				made_progress;
 };
 
+/*
+ * After a successful repair, schedule any additional revalidations needed in
+ * other scrub groups.
+ */
+static int
+revalidate_across_groups(
+	struct scrub_ctx		*ctx,
+	const struct action_item	*old_aitem,
+	struct repair_list_schedule	*rls)
+{
+	struct action_list		alist;
+	int				error;
+
+	action_list_init(&alist);
+
+	error = action_item_schedule_revalidation(ctx, old_aitem, &alist);
+	if (error) {
+		rls->aborted = true;
+		return error;
+	}
+
+	if (action_list_empty(&alist))
+		return 0;
+
+	pthread_mutex_unlock(&rls->lock);
+	error = action_list_revalidate(ctx, &alist);
+	pthread_mutex_lock(&rls->lock);
+
+	if (error)
+		rls->aborted = true;
+	else
+		rls->made_progress = true;
+
+	/*
+	 * Merge the action items into the scrub context for freeing, even if
+	 * there was an error.
+	 */
+	action_list_merge(&rls->requeue_list, &alist);
+	return error;
+}
+
 /* Try to repair as many things on our list as we can. */
 static void
 repair_list_worker(
@@ -89,6 +130,8 @@ repair_list_worker(
 			action_list_add(&rls->requeue_list, aitem);
 			break;
 		case TR_REPAIRED:
+			revalidate_across_groups(ctx, aitem, rls);
+
 			/* Item is clean.  Free it. */
 			free(aitem);
 			break;
diff --git a/scrub/repair.c b/scrub/repair.c
index 79a15f907a1..3e00db7a2fd 100644
--- a/scrub/repair.c
+++ b/scrub/repair.c
@@ -42,6 +42,15 @@ static const unsigned int repair_deps[XFS_SCRUB_TYPE_NR] = {
 					  DEP(XFS_SCRUB_TYPE_PQUOTA),
 	[XFS_SCRUB_TYPE_RTSUM]		= DEP(XFS_SCRUB_TYPE_RTBITMAP),
 };
+
+/*
+ * Data dependencies that cross scrub groups.  When we repair a metadata object
+ * of the given type (e.g. rtgroup bitmaps), we want to trigger a revalidation
+ * of the specified objects (e.g. rt summary file).
+ */
+static const unsigned int cross_group_recheck[XFS_SCRUB_TYPE_NR] = {
+	[XFS_SCRUB_TYPE_RGBITMAP]	= DEP(XFS_SCRUB_TYPE_RTSUM),
+};
 #undef DEP
 
 /*
@@ -781,3 +790,117 @@ repair_item_to_action_item(
 	*aitemp = aitem;
 	return 0;
 }
+
+static int
+schedule_cross_group_recheck(
+	struct scrub_ctx	*ctx,
+	unsigned int		recheck_mask,
+	struct action_list	*new_items)
+{
+	unsigned int		scrub_type;
+
+	foreach_scrub_type(scrub_type) {
+		struct action_item	*aitem;
+
+		if (!(recheck_mask & (1U << scrub_type)))
+			continue;
+
+		switch (xfrog_scrubbers[scrub_type].group) {
+		case XFROG_SCRUB_GROUP_METAFILES:
+			/*
+			 * XXX gcc fortify gets confused on the memset in
+			 * scrub_item_init_fs if we hoist this allocation to a
+			 * helper function.
+			 */
+			aitem = malloc(sizeof(struct action_item));
+			if (!aitem) {
+				int	error = errno;
+
+				str_liberror(ctx, error,
+						_("creating repair revalidation action item"));
+				return error;
+			}
+
+			INIT_LIST_HEAD(&aitem->list);
+			aitem->sri.sri_revalidate = true;
+
+			scrub_item_init_fs(&aitem->sri);
+			scrub_item_schedule(&aitem->sri, scrub_type);
+			action_list_add(new_items, aitem);
+			break;
+		default:
+			/* We don't support any other groups yet. */
+			assert(false);
+			continue;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * After a successful repair, schedule revalidation of metadata outside of this
+ * scrub item's group.
+ */
+int
+action_item_schedule_revalidation(
+	struct scrub_ctx		*ctx,
+	const struct action_item	*old_aitem,
+	struct action_list		*new_items)
+{
+	struct action_item		*aitem, *n;
+	unsigned int			scrub_type;
+	int				error = 0;
+
+	foreach_scrub_type(scrub_type) {
+		unsigned int		mask;
+
+		if (!(old_aitem->sri.sri_selected & (1U << scrub_type)))
+			continue;
+		mask = cross_group_recheck[scrub_type];
+		if (!mask)
+			continue;
+
+		error = schedule_cross_group_recheck(ctx, mask, new_items);
+		if (error)
+			goto bad;
+	}
+
+	return 0;
+bad:
+	list_for_each_entry_safe(aitem, n, &new_items->list, list) {
+		list_del(&aitem->list);
+		free(aitem);
+	}
+	return error;
+}
+
+/*
+ * Revalidate all items scheduled for a recheck, and drop the ones that are
+ * clean.
+ */
+int
+action_list_revalidate(
+	struct scrub_ctx	*ctx,
+	struct action_list	*alist)
+{
+	struct action_item	*aitem, *n;
+	int			error;
+
+	list_for_each_entry_safe(aitem, n, &alist->list, list) {
+		error = scrub_item_check(ctx, &aitem->sri);
+		if (error)
+			return error;
+
+		if (repair_item_count_needsrepair(&aitem->sri) > 0) {
+			aitem->sri.sri_revalidate = false;
+			continue;
+		}
+
+		/* Metadata are clean, delete from list. */
+		list_del(&aitem->list);
+		free(aitem);
+	}
+
+	return 0;
+}
diff --git a/scrub/repair.h b/scrub/repair.h
index c4b9b5799e2..f90ac16b13f 100644
--- a/scrub/repair.h
+++ b/scrub/repair.h
@@ -50,6 +50,11 @@ enum tryrepair_outcome {
 int action_item_try_repair(struct scrub_ctx *ctx, struct action_item *aitem,
 		enum tryrepair_outcome *outcome);
 
+int action_item_schedule_revalidation(struct scrub_ctx *ctx,
+		const struct action_item *old_aitem,
+		struct action_list *new_items);
+int action_list_revalidate(struct scrub_ctx *sc, struct action_list *alist);
+
 void repair_item_mustfix(struct scrub_item *sri, struct scrub_item *fix_now);
 
 /* Primary metadata is corrupt */




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux