From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Read all data blocks from the disk, hoping to catch IO errors. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- configure.ac | 2 include/builddefs.in | 2 m4/package_libcdev.m4 | 28 +++ scrub/Makefile | 7 - scrub/phase6.c | 539 +++++++++++++++++++++++++++++++++++++++++++++++++ scrub/scrub.c | 4 scrub/vfs.c | 199 ++++++++++++++++++ scrub/vfs.h | 33 +++ scrub/xfs.c | 140 +++++++++++++ scrub/xfs.h | 4 10 files changed, 956 insertions(+), 2 deletions(-) create mode 100644 scrub/phase6.c create mode 100644 scrub/vfs.c create mode 100644 scrub/vfs.h diff --git a/configure.ac b/configure.ac index e7ea99e..b2b4566 100644 --- a/configure.ac +++ b/configure.ac @@ -144,6 +144,8 @@ AC_HAVE_MREMAP AC_NEED_INTERNAL_FSXATTR AC_HAVE_GETFSMAP AC_HAVE_MALLINFO +AC_HAVE_OPENAT +AC_HAVE_FSTATAT if test "$enable_blkid" = yes; then AC_HAVE_BLKID_TOPO diff --git a/include/builddefs.in b/include/builddefs.in index c178556..65d3d20 100644 --- a/include/builddefs.in +++ b/include/builddefs.in @@ -114,6 +114,8 @@ HAVE_MREMAP = @have_mremap@ NEED_INTERNAL_FSXATTR = @need_internal_fsxattr@ HAVE_GETFSMAP = @have_getfsmap@ HAVE_MALLINFO = @have_mallinfo@ +HAVE_OPENAT = @have_openat@ +HAVE_FSTATAT = @have_fstatat@ GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall # -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-decl diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4 index 3ccdea5..540633a 100644 --- a/m4/package_libcdev.m4 +++ b/m4/package_libcdev.m4 @@ -315,3 +315,31 @@ AC_DEFUN([AC_HAVE_MALLINFO], AC_MSG_RESULT(no)) AC_SUBST(have_mallinfo) ]) + +# +# Check if we have a openat call +# +AC_DEFUN([AC_HAVE_OPENAT], + [ AC_CHECK_DECL([openat], + have_openat=yes, + [], + [#include <sys/types.h> + #include <sys/stat.h> + #include <fcntl.h>] + ) + AC_SUBST(have_openat) + ]) + +# +# Check if we have a fstatat call +# +AC_DEFUN([AC_HAVE_FSTATAT], + [ AC_CHECK_DECL([fstatat], + have_fstatat=yes, + [], + [#define _GNU_SOURCE + #include <sys/types.h> + #include <sys/stat.h> + #include <unistd.h>]) + AC_SUBST(have_fstatat) + ]) diff --git a/scrub/Makefile b/scrub/Makefile index 5df3e95..18a65b9 100644 --- a/scrub/Makefile +++ b/scrub/Makefile @@ -8,9 +8,9 @@ include $(TOPDIR)/include/builddefs # On linux we get fsmap from the system or define it ourselves # so include this based on platform type. If this reverts to only # the autoconf check w/o local definition, change to testing HAVE_GETFSMAP -SCRUB_PREREQS=$(PKG_PLATFORM) +SCRUB_PREREQS=$(PKG_PLATFORM)$(HAVE_OPENAT)$(HAVE_FSTATAT) -ifeq ($(SCRUB_PREREQS),linux) +ifeq ($(SCRUB_PREREQS),linuxyesyes) LTCOMMAND = xfs_scrub INSTALL_SCRUB = install-scrub endif # scrub_prereqs @@ -25,6 +25,7 @@ disk.h \ ioctl.h \ read_verify.h \ scrub.h \ +vfs.h \ xfs.h CFILES = \ @@ -39,8 +40,10 @@ phase1.c \ phase2.c \ phase3.c \ phase5.c \ +phase6.c \ read_verify.c \ scrub.c \ +vfs.c \ xfs.c LLDLIBS += $(LIBXCMD) $(LIBHANDLE) $(LIBPTHREAD) diff --git a/scrub/phase6.c b/scrub/phase6.c new file mode 100644 index 0000000..d60a044 --- /dev/null +++ b/scrub/phase6.c @@ -0,0 +1,539 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libxfs.h" +#include <sys/statvfs.h> +#include <sys/types.h> +#include <dirent.h> +#include "disk.h" +#include "../repair/threads.h" +#include "handle.h" +#include "path.h" +#include "read_verify.h" +#include "bitmap.h" +#include "vfs.h" +#include "scrub.h" +#include "common.h" +#include "ioctl.h" +#include "xfs_fs.h" +#include "xfs.h" + +/* + * Phase 6: Verify data file integrity. + * + * Identify potential data block extents with GETFSMAP, then feed those + * extents to the read-verify pool to get the verify commands batched, + * issued, and (if there are problems) reported back to us. If there + * are errors, we'll record the bad regions and (if available) use rmap + * to tell us if metadata are now corrupt. Otherwise, we'll scan the + * whole directory tree looking for files that overlap the bad regions + * and report the paths of the now corrupt files. + */ + +/* Find the fd for a given device identifier. */ +static struct disk * +xfs_dev_to_disk( + struct scrub_ctx *ctx, + dev_t dev) +{ + if (dev == ctx->fsinfo.fs_datadev) + return &ctx->datadev; + else if (dev == ctx->fsinfo.fs_logdev) + return &ctx->logdev; + else if (dev == ctx->fsinfo.fs_rtdev) + return &ctx->rtdev; + abort(); +} + +/* Find the device major/minor for a given file descriptor. */ +static dev_t +xfs_disk_to_dev( + struct scrub_ctx *ctx, + struct disk *disk) +{ + if (disk == &ctx->datadev) + return ctx->fsinfo.fs_datadev; + else if (disk == &ctx->logdev) + return ctx->fsinfo.fs_logdev; + else if (disk == &ctx->rtdev) + return ctx->fsinfo.fs_rtdev; + abort(); +} + +struct owner_decode { + uint64_t owner; + const char *descr; +}; + +static const struct owner_decode special_owners[] = { + {XFS_FMR_OWN_FREE, "free space"}, + {XFS_FMR_OWN_UNKNOWN, "unknown owner"}, + {XFS_FMR_OWN_FS, "static FS metadata"}, + {XFS_FMR_OWN_LOG, "journalling log"}, + {XFS_FMR_OWN_AG, "per-AG metadata"}, + {XFS_FMR_OWN_INOBT, "inode btree blocks"}, + {XFS_FMR_OWN_INODES, "inodes"}, + {XFS_FMR_OWN_REFC, "refcount btree"}, + {XFS_FMR_OWN_COW, "CoW staging"}, + {XFS_FMR_OWN_DEFECTIVE, "bad blocks"}, + {0, NULL}, +}; + +/* Decode a special owner. */ +static const char * +xfs_decode_special_owner( + uint64_t owner) +{ + const struct owner_decode *od = special_owners; + + while (od->descr) { + if (od->owner == owner) + return od->descr; + od++; + } + + return NULL; +} + +/* Routines to translate bad physical extents into file paths and offsets. */ + +struct xfs_verify_error_info { + struct bitmap *d_bad; /* bytes */ + struct bitmap *r_bad; /* bytes */ +}; + +/* Report if this extent overlaps a bad region. */ +static bool +xfs_report_verify_inode_bmap( + struct scrub_ctx *ctx, + const char *descr, + int fd, + int whichfork, + struct fsxattr *fsx, + struct xfs_bmap *bmap, + void *arg) +{ + struct xfs_verify_error_info *vei = arg; + struct bitmap *bmp; + + /* Only report errors for real extents. */ + if (bmap->bm_flags & (BMV_OF_PREALLOC | BMV_OF_DELALLOC)) + return true; + + if (fsx->fsx_xflags & FS_XFLAG_REALTIME) + bmp = vei->r_bad; + else + bmp = vei->d_bad; + + if (!bitmap_test(bmp, bmap->bm_physical, bmap->bm_length)) + return true; + + str_error(ctx, descr, +_("offset %llu failed read verification."), bmap->bm_offset); + return true; +} + +/* Iterate the extent mappings of a file to report errors. */ +static bool +xfs_report_verify_fd( + struct scrub_ctx *ctx, + const char *descr, + int fd, + void *arg) +{ + struct xfs_bmap key = {0}; + bool moveon; + + /* data fork */ + moveon = xfs_iterate_bmap(ctx, descr, fd, XFS_DATA_FORK, &key, + xfs_report_verify_inode_bmap, arg); + if (!moveon) + return false; + + /* attr fork */ + moveon = xfs_iterate_bmap(ctx, descr, fd, XFS_ATTR_FORK, &key, + xfs_report_verify_inode_bmap, arg); + if (!moveon) + return false; + return true; +} + +/* Report read verify errors in unlinked (but still open) files. */ +static int +xfs_report_verify_inode( + struct scrub_ctx *ctx, + struct xfs_handle *handle, + struct xfs_bstat *bstat, + void *arg) +{ + char descr[DESCR_BUFSZ]; + char buf[DESCR_BUFSZ]; + bool moveon; + int fd; + int error; + + snprintf(descr, DESCR_BUFSZ, _("inode %llu (unlinked)"), bstat->bs_ino); + + /* Ignore linked files and things we can't open. */ + if (bstat->bs_nlink != 0) + return 0; + if (!S_ISREG(bstat->bs_mode) && !S_ISDIR(bstat->bs_mode)) + return 0; + + /* Try to open the inode. */ + fd = xfs_open_handle(handle); + if (fd < 0) { + error = errno; + if (error == ESTALE) + return error; + + str_warn(ctx, descr, "%s", strerror_r(error, buf, DESCR_BUFSZ)); + return error; + } + + /* Go find the badness. */ + moveon = xfs_report_verify_fd(ctx, descr, fd, arg); + close(fd); + + return moveon ? 0 : XFS_ITERATE_INODES_ABORT; +} + +/* Scan a directory for matches in the read verify error list. */ +static bool +xfs_report_verify_dir( + struct scrub_ctx *ctx, + const char *path, + int dir_fd, + void *arg) +{ + return xfs_report_verify_fd(ctx, path, dir_fd, arg); +} + +/* + * Scan the inode associated with a directory entry for matches with + * the read verify error list. + */ +static bool +xfs_report_verify_dirent( + struct scrub_ctx *ctx, + const char *path, + int dir_fd, + struct dirent *dirent, + struct stat *sb, + void *arg) +{ + bool moveon; + int fd; + + /* Ignore things we can't open. */ + if (!S_ISREG(sb->st_mode) && !S_ISDIR(sb->st_mode)) + return true; + /* Ignore . and .. */ + if (dirent && (!strcmp(".", dirent->d_name) || + !strcmp("..", dirent->d_name))) + return true; + + /* + * If we were given a dirent, open the associated file under + * dir_fd for badblocks scanning. If dirent is NULL, then it's + * the directory itself we want to scan. + */ + fd = openat(dir_fd, dirent->d_name, + O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); + if (fd < 0) + return true; + + /* Go find the badness. */ + moveon = xfs_report_verify_fd(ctx, path, fd, arg); + if (moveon) + goto out; + +out: + close(fd); + + return moveon; +} + +/* Given bad extent lists for the data & rtdev, find bad files. */ +static bool +xfs_report_verify_errors( + struct scrub_ctx *ctx, + struct bitmap *d_bad, + struct bitmap *r_bad) +{ + struct xfs_verify_error_info vei; + bool moveon; + + vei.d_bad = d_bad; + vei.r_bad = r_bad; + + /* Scan the directory tree to get file paths. */ + moveon = scan_fs_tree(ctx, xfs_report_verify_dir, + xfs_report_verify_dirent, &vei); + if (!moveon) + return false; + + /* Scan for unlinked files. */ + return xfs_scan_all_inodes_arg(ctx, xfs_report_verify_inode, &vei); +} + +/* Verify disk blocks with GETFSMAP */ + +struct xfs_verify_extent { + /* Maintain state for the lazy read verifier. */ + struct read_verify rv; + + /* Store bad extents if we don't have parent pointers. */ + struct bitmap *d_bad; /* bytes */ + struct bitmap *r_bad; /* bytes */ + + /* Track the last extent we saw. */ + uint64_t laststart; /* bytes */ + uint64_t lastlength; /* bytes */ + bool lastshared; /* bytes */ +}; + +/* Report an IO error resulting from read-verify based off getfsmap. */ +static bool +xfs_check_rmap_error_report( + struct scrub_ctx *ctx, + const char *descr, + struct fsmap *map, + void *arg) +{ + const char *type; + char buf[32]; + uint64_t err_physical = *(uint64_t *)arg; + uint64_t err_off; + + if (err_physical > map->fmr_physical) + err_off = err_physical - map->fmr_physical; + else + err_off = 0; + + snprintf(buf, 32, _("disk offset %llu"), + BTOBB(map->fmr_physical + err_off)); + + if (map->fmr_flags & FMR_OF_SPECIAL_OWNER) { + type = xfs_decode_special_owner(map->fmr_owner); + str_error(ctx, buf, +_("%s failed read verification."), + type); + } + + /* + * XXX: If we had a getparent() call we could report IO errors + * efficiently. Until then, we'll have to scan the dir tree + * to find the bad file's pathname. + */ + + return true; +} + +/* + * Remember a read error for later, and see if rmap will tell us about the + * owner ahead of time. + */ +void +xfs_check_rmap_ioerr( + struct read_verify_pool *rvp, + struct disk *disk, + uint64_t start, + uint64_t length, + int error, + void *arg) +{ + struct fsmap keys[2]; + char descr[DESCR_BUFSZ]; + struct scrub_ctx *ctx = rvp->rvp_ctx; + struct xfs_verify_extent *ve; + struct bitmap *tree; + dev_t dev; + bool moveon; + + ve = arg; + dev = xfs_disk_to_dev(ctx, disk); + + /* + * If we don't have parent pointers, save the bad extent for + * later rescanning. + */ + if (dev == ctx->fsinfo.fs_datadev) + tree = ve->d_bad; + else if (dev == ctx->fsinfo.fs_rtdev) + tree = ve->r_bad; + else + tree = NULL; + if (tree) { + moveon = bitmap_set(tree, start, length); + if (!moveon) + str_errno(ctx, ctx->mntpoint); + } + + snprintf(descr, DESCR_BUFSZ, _("dev %d:%d ioerr @ %"PRIu64":%"PRIu64" "), + major(dev), minor(dev), start, length); + + /* Go figure out which blocks are bad from the fsmap. */ + memset(keys, 0, sizeof(struct fsmap) * 2); + keys->fmr_device = dev; + keys->fmr_physical = start; + (keys + 1)->fmr_device = dev; + (keys + 1)->fmr_physical = start + length - 1; + (keys + 1)->fmr_owner = ULLONG_MAX; + (keys + 1)->fmr_offset = ULLONG_MAX; + (keys + 1)->fmr_flags = UINT_MAX; + xfs_iterate_fsmap(ctx, descr, keys, xfs_check_rmap_error_report, + &start); +} + +/* Schedule a read-verify of a (data block) extent. */ +static bool +xfs_check_rmap( + struct scrub_ctx *ctx, + const char *descr, + struct fsmap *map, + void *arg) +{ + struct xfs_verify_extent *ve = arg; + struct disk *disk; + + dbg_printf("rmap dev %d:%d phys %llu owner %lld offset %llu " + "len %llu flags 0x%x\n", major(map->fmr_device), + minor(map->fmr_device), map->fmr_physical, + map->fmr_owner, map->fmr_offset, + map->fmr_length, map->fmr_flags); + + /* Remember this extent. */ + ve->lastshared = (map->fmr_flags & FMR_OF_SHARED); + ve->laststart = map->fmr_physical; + ve->lastlength = map->fmr_length; + + /* "Unknown" extents should be verified; they could be data. */ + if ((map->fmr_flags & FMR_OF_SPECIAL_OWNER) && + map->fmr_owner == XFS_FMR_OWN_UNKNOWN) + map->fmr_flags &= ~FMR_OF_SPECIAL_OWNER; + + /* + * We only care about read-verifying data extents that have been + * written to disk. This means we can skip "special" owners + * (metadata), xattr blocks, unwritten extents, and extent maps. + * These should all get checked elsewhere in the scrubber. + */ + if (map->fmr_flags & (FMR_OF_PREALLOC | FMR_OF_ATTR_FORK | + FMR_OF_EXTENT_MAP | FMR_OF_SPECIAL_OWNER)) + goto out; + + /* XXX: Filter out directory data blocks. */ + + /* Schedule the read verify command for (eventual) running. */ + disk = xfs_dev_to_disk(ctx, map->fmr_device); + + read_verify_schedule(ctx->rvp, &ve->rv, disk, map->fmr_physical, + map->fmr_length, ve); + +out: + /* Is this the last extent? Fire off the read. */ + if (map->fmr_flags & FMR_OF_LAST) + read_verify_force(ctx->rvp, &ve->rv); + + return true; +} + +/* + * Read verify all the file data blocks in a filesystem. Since XFS doesn't + * do data checksums, we trust that the underlying storage will pass back + * an IO error if it can't retrieve whatever we previously stored there. + * If we hit an IO error, we'll record the bad blocks in a bitmap and then + * scan the extent maps of the entire fs tree to figure (and the unlinked + * inodes) out which files are now broken. + */ +bool +xfs_scan_blocks( + struct scrub_ctx *ctx) +{ + struct bitmap *d_bad; + struct bitmap *r_bad; + struct xfs_verify_extent *ve; + struct xfs_verify_extent *v; + int i; + unsigned int groups; + bool moveon; + + /* + * Initialize our per-thread context. By convention, + * the log device comes first, then the rt device, and then + * the AGs. + */ + groups = xfs_scan_all_blocks_array_size(ctx); + ve = calloc(groups, sizeof(struct xfs_verify_extent)); + if (!ve) { + str_errno(ctx, ctx->mntpoint); + return false; + } + + moveon = bitmap_init(&d_bad); + if (!moveon) { + str_errno(ctx, ctx->mntpoint); + goto out_ve; + } + + moveon = bitmap_init(&r_bad); + if (!moveon) { + str_errno(ctx, ctx->mntpoint); + goto out_dbad; + } + + for (i = 0, v = ve; i < groups; i++, v++) { + v->d_bad = d_bad; + v->r_bad = r_bad; + } + + moveon = read_verify_pool_init(&ctx->rvp, ctx, ctx->readbuf, + IO_MAX_SIZE, ctx->geo.blocksize, xfs_check_rmap_ioerr, + disk_heads(&ctx->datadev)); + if (!moveon) + goto out_rbad; + moveon = xfs_scan_all_blocks_array_arg(ctx, xfs_check_rmap, + ve, sizeof(*ve)); + if (!moveon) + goto out_pool; + + for (i = 0, v = ve; i < groups; i++, v++) + read_verify_force(ctx->rvp, &v->rv); + read_verify_pool_destroy(&ctx->rvp); + + /* Scan the whole dir tree to see what matches the bad extents. */ + if (!bitmap_empty(d_bad) || !bitmap_empty(r_bad)) + moveon = xfs_report_verify_errors(ctx, d_bad, r_bad); + + bitmap_free(&r_bad); + bitmap_free(&d_bad); + free(ve); + return moveon; + +out_pool: + read_verify_pool_destroy(&ctx->rvp); +out_rbad: + bitmap_free(&r_bad); +out_dbad: + bitmap_free(&d_bad); +out_ve: + free(ve); + return moveon; +} diff --git a/scrub/scrub.c b/scrub/scrub.c index d4527e4..97bd795 100644 --- a/scrub/scrub.c +++ b/scrub/scrub.c @@ -464,6 +464,10 @@ run_scrub_phases( /* Run all phases of the scrub tool. */ for (phase = 1, sp = phases; sp->fn; sp++, phase++) { + /* Turn on certain phases if user said to. */ + if (sp->fn == DATASCAN_DUMMY_FN && scrub_data) + sp->fn = xfs_scan_blocks; + /* Skip certain phases unless they're turned on. */ if (sp->fn == REPAIR_DUMMY_FN || sp->fn == DATASCAN_DUMMY_FN) diff --git a/scrub/vfs.c b/scrub/vfs.c new file mode 100644 index 0000000..1cff2ab --- /dev/null +++ b/scrub/vfs.c @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libxfs.h" +#include <sys/statvfs.h> +#include <sys/types.h> +#include <dirent.h> +#include <sys/xattr.h> +#include "../repair/threads.h" +#include "path.h" +#include "disk.h" +#include "read_verify.h" +#include "scrub.h" +#include "common.h" +#include "vfs.h" + +/* + * Helper functions to assist in traversing a directory tree using regular + * VFS calls. + */ + +/* Scan a filesystem tree. */ +struct scan_fs_tree { + unsigned int nr_dirs; + pthread_mutex_t lock; + pthread_cond_t wakeup; + struct stat root_sb; + bool moveon; + scan_fs_tree_dir_fn dir_fn; + scan_fs_tree_dirent_fn dirent_fn; + void *arg; +}; + +/* Per-work-item scan context. */ +struct scan_fs_tree_dir { + char *path; + struct scan_fs_tree *sft; + bool rootdir; +}; + +/* Scan a directory sub tree. */ +static void +scan_fs_dir( + struct work_queue *wq, + xfs_agnumber_t agno, + void *arg) +{ + struct scrub_ctx *ctx = (struct scrub_ctx *)wq->mp; + struct scan_fs_tree_dir *sftd = arg; + struct scan_fs_tree *sft = sftd->sft; + DIR *dir; + struct dirent *dirent; + char newpath[PATH_MAX]; + struct scan_fs_tree_dir *new_sftd; + struct stat sb; + int dir_fd; + int error; + + /* Open the directory. */ + dir_fd = open(sftd->path, O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NOCTTY); + if (dir_fd < 0) { + if (errno != ENOENT) + str_errno(ctx, sftd->path); + goto out; + } + + /* Caller-specific directory checks. */ + if (!sft->dir_fn(ctx, sftd->path, dir_fd, sft->arg)) { + sft->moveon = false; + goto out; + } + + /* Iterate the directory entries. */ + dir = fdopendir(dir_fd); + if (!dir) { + str_errno(ctx, sftd->path); + goto out; + } + rewinddir(dir); + for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { + snprintf(newpath, PATH_MAX, "%s/%s", sftd->path, + dirent->d_name); + + /* Get the stat info for this directory entry. */ + error = fstatat(dir_fd, dirent->d_name, &sb, + AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW); + if (error) { + str_errno(ctx, newpath); + continue; + } + + /* Ignore files on other filesystems. */ + if (sb.st_dev != sft->root_sb.st_dev) + continue; + + /* Caller-specific directory entry function. */ + if (!sft->dirent_fn(ctx, newpath, dir_fd, dirent, &sb, + sft->arg)) { + sft->moveon = false; + break; + } + + if (xfs_scrub_excessive_errors(ctx)) { + sft->moveon = false; + break; + } + + /* If directory, call ourselves recursively. */ + if (S_ISDIR(sb.st_mode) && strcmp(".", dirent->d_name) && + strcmp("..", dirent->d_name)) { + new_sftd = malloc(sizeof(struct scan_fs_tree_dir)); + if (!new_sftd) { + str_errno(ctx, newpath); + sft->moveon = false; + break; + } + new_sftd->path = strdup(newpath); + new_sftd->sft = sft; + new_sftd->rootdir = false; + pthread_mutex_lock(&sft->lock); + sft->nr_dirs++; + pthread_mutex_unlock(&sft->lock); + queue_work(wq, scan_fs_dir, 0, new_sftd); + } + } + + /* Close dir, go away. */ + error = closedir(dir); + if (error) + str_errno(ctx, sftd->path); + +out: + pthread_mutex_lock(&sft->lock); + sft->nr_dirs--; + if (sft->nr_dirs == 0) + pthread_cond_signal(&sft->wakeup); + pthread_mutex_unlock(&sft->lock); + + free(sftd->path); + free(sftd); +} + +/* Scan the entire filesystem. */ +bool +scan_fs_tree( + struct scrub_ctx *ctx, + scan_fs_tree_dir_fn dir_fn, + scan_fs_tree_dirent_fn dirent_fn, + void *arg) +{ + struct work_queue wq; + struct scan_fs_tree sft; + struct scan_fs_tree_dir *sftd; + + sft.moveon = true; + sft.nr_dirs = 1; + sft.root_sb = ctx->mnt_sb; + sft.dir_fn = dir_fn; + sft.dirent_fn = dirent_fn; + sft.arg = arg; + pthread_mutex_init(&sft.lock, NULL); + pthread_cond_init(&sft.wakeup, NULL); + + sftd = malloc(sizeof(struct scan_fs_tree_dir)); + if (!sftd) { + str_errno(ctx, ctx->mntpoint); + return false; + } + sftd->path = strdup(ctx->mntpoint); + sftd->sft = &sft; + sftd->rootdir = true; + + create_work_queue(&wq, (struct xfs_mount *)ctx, scrub_nproc(ctx)); + queue_work(&wq, scan_fs_dir, 0, sftd); + + pthread_mutex_lock(&sft.lock); + pthread_cond_wait(&sft.wakeup, &sft.lock); + assert(sft.nr_dirs == 0); + pthread_mutex_unlock(&sft.lock); + destroy_work_queue(&wq); + + return sft.moveon; +} diff --git a/scrub/vfs.h b/scrub/vfs.h new file mode 100644 index 0000000..3a3b2dc --- /dev/null +++ b/scrub/vfs.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef XFS_SCRUB_VFS_H_ +#define XFS_SCRUB_VFS_H_ + +struct scrub_ctx; + +typedef bool (*scan_fs_tree_dir_fn)(struct scrub_ctx *, const char *, + int, void *); +typedef bool (*scan_fs_tree_dirent_fn)(struct scrub_ctx *, const char *, + int, struct dirent *, struct stat *, void *); + +bool scan_fs_tree(struct scrub_ctx *ctx, scan_fs_tree_dir_fn dir_fn, + scan_fs_tree_dirent_fn dirent_fn, void *arg); + +#endif /* XFS_SCRUB_VFS_H_ */ diff --git a/scrub/xfs.c b/scrub/xfs.c index 882bd28..36a5ba1 100644 --- a/scrub/xfs.c +++ b/scrub/xfs.c @@ -25,6 +25,7 @@ #include "../repair/threads.h" #include "handle.h" #include "path.h" +#include "vfs.h" #include "scrub.h" #include "common.h" #include "ioctl.h" @@ -130,3 +131,142 @@ xfs_scan_all_inodes_arg( { return xfs_scan_all_inodes_array_arg(ctx, fn, arg, 0); } + +/* GETFSMAP wrappers routines. */ +struct xfs_scan_blocks { + xfs_fsmap_iter_fn fn; + void *arg; + size_t array_arg_size; + bool moveon; +}; + +/* Iterate all the reverse mappings of an AG. */ +static void +xfs_scan_ag_blocks( + struct work_queue *wq, + xfs_agnumber_t agno, + void *arg) +{ + struct scrub_ctx *ctx = (struct scrub_ctx *)wq->mp; + struct xfs_scan_blocks *sbx = arg; + void *fn_arg; + char descr[DESCR_BUFSZ]; + struct fsmap keys[2]; + off64_t bperag; + bool moveon; + + bperag = (off64_t)ctx->geo.agblocks * + (off64_t)ctx->geo.blocksize; + + snprintf(descr, DESCR_BUFSZ, _("dev %d:%d AG %u fsmap"), + major(ctx->fsinfo.fs_datadev), + minor(ctx->fsinfo.fs_datadev), + agno); + + memset(keys, 0, sizeof(struct fsmap) * 2); + keys->fmr_device = ctx->fsinfo.fs_datadev; + keys->fmr_physical = agno * bperag; + (keys + 1)->fmr_device = ctx->fsinfo.fs_datadev; + (keys + 1)->fmr_physical = ((agno + 1) * bperag) - 1; + (keys + 1)->fmr_owner = ULLONG_MAX; + (keys + 1)->fmr_offset = ULLONG_MAX; + (keys + 1)->fmr_flags = UINT_MAX; + + fn_arg = ((char *)sbx->arg) + sbx->array_arg_size * agno; + moveon = xfs_iterate_fsmap(ctx, descr, keys, sbx->fn, fn_arg); + if (!moveon) + sbx->moveon = false; +} + +/* Iterate all the reverse mappings of a standalone device. */ +static void +xfs_scan_dev_blocks( + struct scrub_ctx *ctx, + int idx, + dev_t dev, + struct xfs_scan_blocks *sbx) +{ + struct fsmap keys[2]; + char descr[DESCR_BUFSZ]; + void *fn_arg; + bool moveon; + + snprintf(descr, DESCR_BUFSZ, _("dev %d:%d fsmap"), + major(dev), minor(dev)); + + memset(keys, 0, sizeof(struct fsmap) * 2); + keys->fmr_device = dev; + (keys + 1)->fmr_device = dev; + (keys + 1)->fmr_physical = ULLONG_MAX; + (keys + 1)->fmr_owner = ULLONG_MAX; + (keys + 1)->fmr_offset = ULLONG_MAX; + (keys + 1)->fmr_flags = UINT_MAX; + + fn_arg = ((char *)sbx->arg) + sbx->array_arg_size * idx; + moveon = xfs_iterate_fsmap(ctx, descr, keys, sbx->fn, fn_arg); + if (!moveon) + sbx->moveon = false; +} + +/* Iterate all the reverse mappings of the realtime device. */ +static void +xfs_scan_rt_blocks( + struct work_queue *wq, + xfs_agnumber_t agno, + void *arg) +{ + struct scrub_ctx *ctx = (struct scrub_ctx *)wq->mp; + + xfs_scan_dev_blocks(ctx, agno, ctx->fsinfo.fs_rtdev, arg); +} + +/* Iterate all the reverse mappings of the log device. */ +static void +xfs_scan_log_blocks( + struct work_queue *wq, + xfs_agnumber_t agno, + void *arg) +{ + struct scrub_ctx *ctx = (struct scrub_ctx *)wq->mp; + + xfs_scan_dev_blocks(ctx, agno, ctx->fsinfo.fs_logdev, arg); +} + +/* How many array elements should we create to scan all the blocks? */ +size_t +xfs_scan_all_blocks_array_size( + struct scrub_ctx *ctx) +{ + return ctx->geo.agcount + 2; +} + +/* Scan all the blocks in a filesystem. */ +bool +xfs_scan_all_blocks_array_arg( + struct scrub_ctx *ctx, + xfs_fsmap_iter_fn fn, + void *arg, + size_t array_arg_size) +{ + xfs_agnumber_t agno; + struct work_queue wq; + struct xfs_scan_blocks sbx; + + sbx.moveon = true; + sbx.fn = fn; + sbx.arg = arg; + sbx.array_arg_size = array_arg_size; + + create_work_queue(&wq, (struct xfs_mount *)ctx, scrub_nproc(ctx)); + if (ctx->fsinfo.fs_rt) + queue_work(&wq, xfs_scan_rt_blocks, ctx->geo.agcount + 1, + &sbx); + if (ctx->fsinfo.fs_log) + queue_work(&wq, xfs_scan_log_blocks, ctx->geo.agcount + 2, + &sbx); + for (agno = 0; agno < ctx->geo.agcount; agno++) + queue_work(&wq, xfs_scan_ag_blocks, agno, &sbx); + destroy_work_queue(&wq); + + return sbx.moveon; +} diff --git a/scrub/xfs.h b/scrub/xfs.h index 2e434c2..7d087db 100644 --- a/scrub/xfs.h +++ b/scrub/xfs.h @@ -24,6 +24,9 @@ void xfs_shutdown_fs(struct scrub_ctx *ctx); bool xfs_scan_all_inodes(struct scrub_ctx *ctx, xfs_inode_iter_fn fn); bool xfs_scan_all_inodes_arg(struct scrub_ctx *ctx, xfs_inode_iter_fn fn, void *arg); +size_t xfs_scan_all_blocks_array_size(struct scrub_ctx *ctx); +bool xfs_scan_all_blocks_array_arg(struct scrub_ctx *ctx, xfs_fsmap_iter_fn fn, + void *arg, size_t array_arg_size); /* Phase-specific functions. */ bool xfs_cleanup(struct scrub_ctx *ctx); @@ -31,5 +34,6 @@ bool xfs_scan_fs(struct scrub_ctx *ctx); bool xfs_scan_metadata(struct scrub_ctx *ctx); bool xfs_scan_inodes(struct scrub_ctx *ctx); bool xfs_scan_connections(struct scrub_ctx *ctx); +bool xfs_scan_blocks(struct scrub_ctx *ctx); #endif /* XFS_SCRUB_XFS_H_ */ -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html