From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Use the fs and ag geometry ioctls to report health problems to users. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- include/xfrog.h | 2 libfrog/fsgeom.c | 11 + man/man8/xfs_spaceman.8 | 28 +++ spaceman/Makefile | 2 spaceman/health.c | 432 +++++++++++++++++++++++++++++++++++++++++++++++ spaceman/init.c | 1 spaceman/space.h | 1 7 files changed, 476 insertions(+), 1 deletion(-) create mode 100644 spaceman/health.c diff --git a/include/xfrog.h b/include/xfrog.h index 5748e967..3a43a403 100644 --- a/include/xfrog.h +++ b/include/xfrog.h @@ -177,4 +177,6 @@ struct xfs_inogrp; int xfrog_inumbers(struct xfs_fd *xfd, uint64_t *lastino, uint32_t icount, struct xfs_inogrp *ubuffer, uint32_t *ocount); +int xfrog_ag_geometry(int fd, unsigned int agno, struct xfs_ag_geometry *ageo); + #endif /* __XFROG_H__ */ diff --git a/libfrog/fsgeom.c b/libfrog/fsgeom.c index 17479e4a..cddb5a39 100644 --- a/libfrog/fsgeom.c +++ b/libfrog/fsgeom.c @@ -131,3 +131,14 @@ xfrog_close( xfd->fd = -1; return ret; } + +/* Try to obtain an AG's geometry. */ +int +xfrog_ag_geometry( + int fd, + unsigned int agno, + struct xfs_ag_geometry *ageo) +{ + ageo->ag_number = agno; + return ioctl(fd, XFS_IOC_AG_GEOMETRY, ageo); +} diff --git a/man/man8/xfs_spaceman.8 b/man/man8/xfs_spaceman.8 index 12dd04e4..ece840d7 100644 --- a/man/man8/xfs_spaceman.8 +++ b/man/man8/xfs_spaceman.8 @@ -91,6 +91,34 @@ The output will have the same format that .BR "xfs_info" "(8)" prints when querying a filesystem. .TP +.BI "health [ \-a agno] [ \-c ] [ \-f ] [ \-i inum ] [ \-q ] [ paths ]" +Reports the health of the given group of filesystem metadata. +.RS 1.0i +.PD 0 +.TP 0.4i +.B \-a agno +Report on the health of the given allocation group. +.TP +.B \-c +Scan all inodes in the filesystem and report each file's health status. +If the +.B \-a +option is given, scan only the inodes in that AG. +.TP +.B \-f +Report on the health of metadata that affect the entire filesystem. +.TP +.B \-i inum +Report on the health of a specific inode. +.TP +.B \-q +Report only unhealthy metadata. +.TP +.B paths +Report on the health of the files at the given path. +.PD +.RE +.TP .BR "help [ " command " ]" Display a brief description of one or all commands. .TP diff --git a/spaceman/Makefile b/spaceman/Makefile index b1c1b16d..d01aa74a 100644 --- a/spaceman/Makefile +++ b/spaceman/Makefile @@ -7,7 +7,7 @@ include $(TOPDIR)/include/builddefs LTCOMMAND = xfs_spaceman HFILES = init.h space.h -CFILES = info.c init.c file.c prealloc.c trim.c +CFILES = info.c init.c file.c health.c prealloc.c trim.c LSRCFILES = xfs_info.sh LLDLIBS = $(LIBXCMD) $(LIBFROG) diff --git a/spaceman/health.c b/spaceman/health.c new file mode 100644 index 00000000..6c9c75a1 --- /dev/null +++ b/spaceman/health.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2019 Oracle. + * All Rights Reserved. + */ +#include "platform_defs.h" +#include "libxfs.h" +#include "xfrog.h" +#include "command.h" +#include "init.h" +#include "path.h" +#include "space.h" +#include "input.h" +#include "fsgeom.h" +#include "libfrog.h" + +static cmdinfo_t health_cmd; +static unsigned long long reported; +static bool comprehensive; +static bool quiet; + +static bool has_realtime(const struct xfs_fsop_geom *g) +{ + return g->rtblocks > 0; +} + +static bool has_finobt(const struct xfs_fsop_geom *g) +{ + return g->flags & XFS_FSOP_GEOM_FLAGS_FINOBT; +} + +static bool has_rmapbt(const struct xfs_fsop_geom *g) +{ + return g->flags & XFS_FSOP_GEOM_FLAGS_RMAPBT; +} + +static bool has_reflink(const struct xfs_fsop_geom *g) +{ + return g->flags & XFS_FSOP_GEOM_FLAGS_REFLINK; +} + +struct flag_map { + unsigned int mask; + bool (*has_fn)(const struct xfs_fsop_geom *g); + const char *descr; +}; + +static const struct flag_map fs_flags[] = { + { + .mask = XFS_FSOP_GEOM_SICK_COUNTERS, + .descr = "summary counters", + }, + { + .mask = XFS_FSOP_GEOM_SICK_UQUOTA, + .descr = "user quota", + }, + { + .mask = XFS_FSOP_GEOM_SICK_GQUOTA, + .descr = "group quota", + }, + { + .mask = XFS_FSOP_GEOM_SICK_PQUOTA, + .descr = "project quota", + }, + { + .mask = XFS_FSOP_GEOM_SICK_RT_BITMAP, + .descr = "realtime bitmap", + .has_fn = has_realtime, + }, + { + .mask = XFS_FSOP_GEOM_SICK_RT_SUMMARY, + .descr = "realtime summary", + .has_fn = has_realtime, + }, + {0}, +}; + +static const struct flag_map ag_flags[] = { + { + .mask = XFS_AG_GEOM_SICK_SB, + .descr = "superblock", + }, + { + .mask = XFS_AG_GEOM_SICK_AGF, + .descr = "AGF header", + }, + { + .mask = XFS_AG_GEOM_SICK_AGFL, + .descr = "AGFL header", + }, + { + .mask = XFS_AG_GEOM_SICK_AGI, + .descr = "AGI header", + }, + { + .mask = XFS_AG_GEOM_SICK_BNOBT, + .descr = "free space by block btree", + }, + { + .mask = XFS_AG_GEOM_SICK_CNTBT, + .descr = "free space by length btree", + }, + { + .mask = XFS_AG_GEOM_SICK_INOBT, + .descr = "inode btree", + }, + { + .mask = XFS_AG_GEOM_SICK_FINOBT, + .descr = "free inode btree", + .has_fn = has_finobt, + }, + { + .mask = XFS_AG_GEOM_SICK_RMAPBT, + .descr = "reverse mappings btree", + .has_fn = has_rmapbt, + }, + { + .mask = XFS_AG_GEOM_SICK_REFCNTBT, + .descr = "reference count btree", + .has_fn = has_reflink, + }, + {0}, +}; + +static const struct flag_map inode_flags[] = { + { + .mask = XFS_BS_SICK_INODE, + .descr = "inode core", + }, + { + .mask = XFS_BS_SICK_BMBTD, + .descr = "data fork", + }, + { + .mask = XFS_BS_SICK_BMBTA, + .descr = "extended attribute fork", + }, + { + .mask = XFS_BS_SICK_BMBTC, + .descr = "copy on write fork", + }, + { + .mask = XFS_BS_SICK_DIR, + .descr = "directory", + }, + { + .mask = XFS_BS_SICK_XATTR, + .descr = "extended attributes", + }, + { + .mask = XFS_BS_SICK_SYMLINK, + .descr = "symbolic link target", + }, + { + .mask = XFS_BS_SICK_PARENT, + .descr = "parent pointers", + }, + {0}, +}; + +/* Convert a flag mask to a report. */ +static void +report_sick( + const char *descr, + const struct flag_map *maps, + unsigned int sick, + unsigned int checked) +{ + const struct flag_map *f; + bool bad; + + for (f = maps; f->mask != 0; f++) { + if (f->has_fn && !f->has_fn(&file->xfd.fsgeom)) + continue; + if (!(checked & f->mask)) + continue; + reported++; + bad = sick & f->mask; + if (!bad && quiet) + continue; + printf("%s %s: %s\n", descr, _(f->descr), + bad ? _("unhealthy") : _("ok")); + } +} + +/* Report on an AG's health. */ +static int +report_ag_sick( + xfs_agnumber_t agno) +{ + struct xfs_ag_geometry ageo; + char descr[256]; + int ret; + + ret = xfrog_ag_geometry(file->xfd.fd, agno, &ageo); + if (ret) { + perror("ag_geometry"); + return 1; + } + snprintf(descr, sizeof(descr) - 1, _("AG %u"), agno); + report_sick(descr, ag_flags, ageo.ag_sick, ageo.ag_checked); + return 0; +} + +/* Report on an inode's health. */ +static int +report_inode_health( + unsigned long long ino, + const char *descr) +{ + struct xfs_bstat bs; + char d[256]; + int ret; + + if (!descr) { + snprintf(d, sizeof(d) - 1, _("inode %llu"), ino); + descr = d; + } + + ret = xfrog_bulkstat_single(&file->xfd, ino, &bs); + if (ret) { + perror(descr); + return 1; + } + + report_sick(descr, inode_flags, bs.bs_sick, bs.bs_checked); + return 0; +} + +/* Report on a file's health. */ +static int +report_file_health( + const char *path) +{ + struct stat stata, statb; + int ret; + + ret = lstat(path, &statb); + if (ret) { + perror(path); + return 1; + } + + ret = fstat(file->xfd.fd, &stata); + if (ret) { + perror(file->name); + return 1; + } + + if (stata.st_dev != statb.st_dev) { + fprintf(stderr, _("%s: not on the open filesystem"), path); + return 1; + } + + return report_inode_health(statb.st_ino, path); +} + +/* + * Report on all files' health for a given @agno. If @agno is NULLAGNUMBER, + * report on all files in the filesystem. + */ +static int +report_bulkstat_health( + xfs_agnumber_t agno) +{ + struct xfs_bstat bstat[128]; + char descr[256]; + uint64_t startino = 0; + uint64_t lastino = -1ULL; + uint32_t ocount; + uint32_t i; + int error; + + if (agno != NULLAGNUMBER) { + startino = xfrog_agino_to_ino(&file->xfd, agno, 0); + lastino = xfrog_agino_to_ino(&file->xfd, agno + 1, 0) - 1; + } + + while ((error = xfrog_bulkstat(&file->xfd, &startino, 128, bstat, + &ocount) == 0) && ocount > 0) { + for (i = 0; i < ocount; i++) { + if (bstat[i].bs_ino > lastino) + goto out; + snprintf(descr, sizeof(descr) - 1, _("inode %llu"), + bstat[i].bs_ino); + report_sick(descr, inode_flags, bstat[i].bs_sick, + bstat[i].bs_checked); + } + } +out: + return error; +} + +/* Report on health problems in XFS filesystem. */ +static int +health_f( + int argc, + char **argv) +{ + unsigned long long x; + xfs_agnumber_t agno; + bool default_report = true; + int c; + int ret; + + reported = 0; + + if (file->xfd.fsgeom.version != XFS_FSOP_GEOM_VERSION_V5) { + perror("health"); + return 1; + } + + while ((c = getopt(argc, argv, "a:cfi:q")) != EOF) { + switch (c) { + case 'a': + default_report = false; + errno = 0; + x = strtoll(optarg, NULL, 10); + if (!errno && x >= NULLAGNUMBER) + errno = ERANGE; + if (errno) { + perror("ag health"); + return 1; + } + agno = x; + ret = report_ag_sick(agno); + if (!ret && comprehensive) + ret = report_bulkstat_health(agno); + if (ret) + return 1; + break; + case 'c': + comprehensive = true; + break; + case 'f': + default_report = false; + report_sick(_("filesystem"), fs_flags, + file->xfd.fsgeom.sick, + file->xfd.fsgeom.checked); + if (comprehensive) { + ret = report_bulkstat_health(NULLAGNUMBER); + if (ret) + return 1; + } + break; + case 'i': + default_report = false; + errno = 0; + x = strtoll(optarg, NULL, 10); + if (errno) { + perror("inode health"); + return 1; + } + ret = report_inode_health(x, NULL); + if (ret) + return 1; + break; + case 'q': + quiet = true; + break; + default: + return command_usage(&health_cmd); + } + } + + for (c = optind; c < argc; c++) { + default_report = false; + ret = report_file_health(argv[c]); + if (ret) + return 1; + } + + /* No arguments gets us a summary of fs state. */ + if (default_report) { + report_sick(_("filesystem"), fs_flags, file->xfd.fsgeom.sick, + file->xfd.fsgeom.checked); + + for (agno = 0; agno < file->xfd.fsgeom.agcount; agno++) { + ret = report_ag_sick(agno); + if (ret) + return 1; + } + if (comprehensive) { + ret = report_bulkstat_health(NULLAGNUMBER); + if (ret) + return 1; + } + } + + if (!reported) { + fprintf(stderr, +_("Health status has not been collected for this filesystem.\n")); + fprintf(stderr, +_("Please run xfs_scrub(8) to remedy this situation.\n")); + } + + return 0; +} + +static void +health_help(void) +{ + printf(_( +"\n" +"Report all observed filesystem health problems.\n" +"\n" +" -a agno -- Report health of the given allocation group.\n" +" -c -- Report on the health of all inodes.\n" +" -f -- Report health of the overall filesystem.\n" +" -i inum -- Report health of a given inode number.\n" +" -q -- Only report unhealthy metadata.\n" +" paths -- Report health of the given file path.\n" +"\n")); + +} + +static cmdinfo_t health_cmd = { + .name = "health", + .cfunc = health_f, + .argmin = 0, + .argmax = -1, + .args = "[-a agno] [-c] [-f] [-i inum] [-q] [paths]", + .flags = CMD_FLAG_ONESHOT, + .help = health_help, +}; + +void +health_init(void) +{ + health_cmd.oneline = _("Report observed XFS health problems."), + add_command(&health_cmd); +} diff --git a/spaceman/init.c b/spaceman/init.c index 2698f420..80740cda 100644 --- a/spaceman/init.c +++ b/spaceman/init.c @@ -34,6 +34,7 @@ init_commands(void) quit_init(); trim_init(); freesp_init(); + health_init(); } static int diff --git a/spaceman/space.h b/spaceman/space.h index 2c26884a..723209ed 100644 --- a/spaceman/space.h +++ b/spaceman/space.h @@ -32,5 +32,6 @@ extern void freesp_init(void); # define freesp_init() do { } while (0) #endif extern void info_init(void); +extern void health_init(void); #endif /* XFS_SPACEMAN_SPACE_H_ */