From: Darrick J. Wong <djwong@xxxxxxxxxx> While profiling the performance of xfs_scrub, I noticed that phase3 only employs the scrub-by-handle interface. The kernel has had the ability to skip the untrusted iget lookup if the fd matches the handle data since the beginning, and using it reduces the phase 3 runtime by 5% on the author's system. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- scrub/phase3.c | 32 ++++++++++++++++++++++---------- scrub/scrub.c | 21 ++++++++++++++++++--- scrub/scrub.h | 2 +- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/scrub/phase3.c b/scrub/phase3.c index 868f444d..7da11299 100644 --- a/scrub/phase3.c +++ b/scrub/phase3.c @@ -59,16 +59,27 @@ scrub_inode( agno = cvt_ino_to_agno(&ctx->mnt, bstat->bs_ino); background_sleep(); - /* Try to open the inode to pin it. */ + /* + * Open this regular file to pin it in memory. Avoiding the use of + * scan-by-handle means that the in-kernel scrubber doesn't pay the + * cost of opening the handle (looking up the inode in the inode btree, + * grabbing the inode, checking the generation) with every scrub call. + * + * Note: We cannot use this same trick for directories because the VFS + * will try to reconnect directory file handles to the root directory + * by walking '..' entries upwards, and loops in the dirent index + * btree will cause livelocks. + * + * ESTALE means we scan the whole cluster again. + */ if (S_ISREG(bstat->bs_mode)) { fd = scrub_open_handle(handle); - /* Stale inode means we scan the whole cluster again. */ if (fd < 0 && errno == ESTALE) return ESTALE; } /* Scrub the inode. */ - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_INODE, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_INODE, &alist); if (error) goto out; @@ -77,13 +88,13 @@ scrub_inode( goto out; /* Scrub all block mappings. */ - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_BMBTD, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTD, &alist); if (error) goto out; - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_BMBTA, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTA, &alist); if (error) goto out; - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_BMBTC, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_BMBTC, &alist); if (error) goto out; @@ -93,21 +104,22 @@ scrub_inode( if (S_ISLNK(bstat->bs_mode)) { /* Check symlink contents. */ - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_SYMLINK, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_SYMLINK, + &alist); } else if (S_ISDIR(bstat->bs_mode)) { /* Check the directory entries. */ - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_DIR, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_DIR, &alist); } if (error) goto out; /* Check all the extended attributes. */ - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_XATTR, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_XATTR, &alist); if (error) goto out; /* Check parent pointers. */ - error = scrub_file(ctx, bstat, XFS_SCRUB_TYPE_PARENT, &alist); + error = scrub_file(ctx, fd, bstat, XFS_SCRUB_TYPE_PARENT, &alist); if (error) goto out; diff --git a/scrub/scrub.c b/scrub/scrub.c index 0034f11d..19a0b2d0 100644 --- a/scrub/scrub.c +++ b/scrub/scrub.c @@ -122,6 +122,7 @@ scrub_warn_incomplete_scrub( static enum check_outcome xfs_check_metadata( struct scrub_ctx *ctx, + struct xfs_fd *xfdp, struct xfs_scrub_metadata *meta, bool is_inode) { @@ -135,7 +136,7 @@ xfs_check_metadata( dbg_printf("check %s flags %xh\n", descr_render(&dsc), meta->sm_flags); retry: - error = -xfrog_scrub_metadata(&ctx->mnt, meta); + error = -xfrog_scrub_metadata(xfdp, meta); if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !error) meta->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; switch (error) { @@ -316,7 +317,7 @@ scrub_meta_type( background_sleep(); /* Check the item. */ - fix = xfs_check_metadata(ctx, &meta, false); + fix = xfs_check_metadata(ctx, &ctx->mnt, &meta, false); progress_add(1); switch (fix) { @@ -452,11 +453,14 @@ scrub_estimate_ag_work( int scrub_file( struct scrub_ctx *ctx, + int fd, const struct xfs_bulkstat *bstat, unsigned int type, struct action_list *alist) { struct xfs_scrub_metadata meta = {0}; + struct xfs_fd xfd; + struct xfs_fd *xfdp = &ctx->mnt; enum check_outcome fix; assert(type < XFS_SCRUB_TYPE_NR); @@ -466,8 +470,19 @@ scrub_file( meta.sm_ino = bstat->bs_ino; meta.sm_gen = bstat->bs_gen; + /* + * If the caller passed us a file descriptor for a scrub, use it + * instead of scrub-by-handle because this enables the kernel to skip + * costly inode btree lookups. + */ + if (fd >= 0) { + memcpy(&xfd, xfdp, sizeof(xfd)); + xfd.fd = fd; + xfdp = &xfd; + } + /* Scrub the piece of metadata. */ - fix = xfs_check_metadata(ctx, &meta, true); + fix = xfs_check_metadata(ctx, xfdp, &meta, true); if (fix == CHECK_ABORT) return ECANCELED; if (fix == CHECK_DONE) diff --git a/scrub/scrub.h b/scrub/scrub.h index 5b5f6b65..325d8f95 100644 --- a/scrub/scrub.h +++ b/scrub/scrub.h @@ -34,7 +34,7 @@ bool can_scrub_symlink(struct scrub_ctx *ctx); bool can_scrub_parent(struct scrub_ctx *ctx); bool xfs_can_repair(struct scrub_ctx *ctx); -int scrub_file(struct scrub_ctx *ctx, const struct xfs_bulkstat *bstat, +int scrub_file(struct scrub_ctx *ctx, int fd, const struct xfs_bulkstat *bstat, unsigned int type, struct action_list *alist); /* Repair parameters are the scrub inputs and retry count. */