[PATCH 2/6] xfs_scrub: in phase 3, use the opened file descriptor for scrub calls

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Darrick J. Wong <djwong@xxxxxxxxxx>

While profiling the performance of xfs_scrub, I noticed that phase3 only
employs the scrub-by-handle interface.  The kernel has had the ability
to skip the untrusted iget lookup if the fd matches the handle data
since the beginning, and using it reduces the phase 3 runtime by 5% on
the author's system.

Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
---
 scrub/phase3.c |   32 ++++++++++++++++++++++----------
 scrub/scrub.c  |   21 ++++++++++++++++++---
 scrub/scrub.h  |    2 +-
 3 files changed, 41 insertions(+), 14 deletions(-)


diff --git a/scrub/phase3.c b/scrub/phase3.c
index 868f444d..7da11299 100644
--- a/scrub/phase3.c
+++ b/scrub/phase3.c
@@ -59,16 +59,27 @@ scrub_inode(
 	agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino);
 	background_sleep();
 
-	/* Try to open the inode to pin it. */
+	/*
+	 * Open this regular file to pin it in memory.  Avoiding the use of
+	 * scan-by-handle means that the in-kernel scrubber doesn't pay the
+	 * cost of opening the handle (looking up the inode in the inode btree,
+	 * grabbing the inode, checking the generation) with every scrub call.
+	 *
+	 * Note: We cannot use this same trick for directories because the VFS
+	 * will try to reconnect directory file handles to the root directory
+	 * by walking '..' entries upwards, and loops in the dirent index
+	 * btree will cause livelocks.
+	 *
+	 * ESTALE means we scan the whole cluster again.
+	 */
 	if (S_ISREG(bstat->bs_mode)) {
 		fd = scrub_open_handle(handle);
-		/* Stale inode means we scan the whole cluster again. */
 		if (fd < 0 && errno == ESTALE)
 			return ESTALE;
 	}
 
 	/* Scrub the inode. */
-	error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_INODE, &alist);
+	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist);
 	if (error)
 		goto out;
 
@@ -77,13 +88,13 @@ scrub_inode(
 		goto out;
 
 	/* Scrub all block mappings. */
-	error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_BMBTD, &alist);
+	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist);
 	if (error)
 		goto out;
-	error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_BMBTA, &alist);
+	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist);
 	if (error)
 		goto out;
-	error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_BMBTC, &alist);
+	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist);
 	if (error)
 		goto out;
 
@@ -93,21 +104,22 @@ scrub_inode(
 
 	if (S_ISLNK(bstat->bs_mode)) {
 		/* Check symlink contents. */
-		error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_SYMLINK, &alist);
+		error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK,
+				&alist);
 	} else if (S_ISDIR(bstat->bs_mode)) {
 		/* Check the directory entries. */
-		error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_DIR, &alist);
+		error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist);
 	}
 	if (error)
 		goto out;
 
 	/* Check all the extended attributes. */
-	error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_XATTR, &alist);
+	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist);
 	if (error)
 		goto out;
 
 	/* Check parent pointers. */
-	error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_PARENT, &alist);
+	error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist);
 	if (error)
 		goto out;
 
diff --git a/scrub/scrub.c b/scrub/scrub.c
index 0034f11d..19a0b2d0 100644
--- a/scrub/scrub.c
+++ b/scrub/scrub.c
@@ -122,6 +122,7 @@ scrub_warn_incomplete_scrub(
 static enum check_outcome
 xfs_check_metadata(
 	struct scrub_ctx		*ctx,
+	struct xfs_fd			*xfdp,
 	struct xfs_scrub_metadata	*meta,
 	bool				is_inode)
 {
@@ -135,7 +136,7 @@ xfs_check_metadata(
 
 	dbg_printf("check %s flags %xh\n", descr_render(&dsc), meta->sm_flags);
 retry:
-	error = -xfrog_scrub_metadata(&ctx->mnt, meta);
+	error = -xfrog_scrub_metadata(xfdp, meta);
 	if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !error)
 		meta->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 	switch (error) {
@@ -316,7 +317,7 @@ scrub_meta_type(
 	background_sleep();
 
 	/* Check the item. */
-	fix = xfs_check_metadata(ctx, &meta, false);
+	fix = xfs_check_metadata(ctx, &ctx->mnt, &meta, false);
 	progress_add(1);
 
 	switch (fix) {
@@ -452,11 +453,14 @@ scrub_estimate_ag_work(
 int
 scrub_file(
 	struct scrub_ctx		*ctx,
+	int				fd,
 	const struct xfs_bulkstat	*bstat,
 	unsigned int			type,
 	struct action_list		*alist)
 {
 	struct xfs_scrub_metadata	meta = {0};
+	struct xfs_fd			xfd;
+	struct xfs_fd			*xfdp = &ctx->mnt;
 	enum check_outcome		fix;
 
 	assert(type < XFS_SCRUB_TYPE_NR);
@@ -466,8 +470,19 @@ scrub_file(
 	meta.sm_ino = bstat->bs_ino;
 	meta.sm_gen = bstat->bs_gen;
 
+	/*
+	 * If the caller passed us a file descriptor for a scrub, use it
+	 * instead of scrub-by-handle because this enables the kernel to skip
+	 * costly inode btree lookups.
+	 */
+	if (fd >= 0) {
+		memcpy(&xfd, xfdp, sizeof(xfd));
+		xfd.fd = fd;
+		xfdp = &xfd;
+	}
+
 	/* Scrub the piece of metadata. */
-	fix = xfs_check_metadata(ctx, &meta, true);
+	fix = xfs_check_metadata(ctx, xfdp, &meta, true);
 	if (fix == CHECK_ABORT)
 		return ECANCELED;
 	if (fix == CHECK_DONE)
diff --git a/scrub/scrub.h b/scrub/scrub.h
index 5b5f6b65..325d8f95 100644
--- a/scrub/scrub.h
+++ b/scrub/scrub.h
@@ -34,7 +34,7 @@ bool can_scrub_symlink(struct scrub_ctx *ctx);
 bool can_scrub_parent(struct scrub_ctx *ctx);
 bool xfs_can_repair(struct scrub_ctx *ctx);
 
-int scrub_file(struct scrub_ctx *ctx, const struct xfs_bulkstat *bstat,
+int scrub_file(struct scrub_ctx *ctx, int fd, const struct xfs_bulkstat *bstat,
 		unsigned int type, struct action_list *alist);
 
 /* Repair parameters are the scrub inputs and retry count. */




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux