From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
Create an ioctl that can be used to scrub internal filesystem metadata.
The new ioctl takes the metadata type, an (optional) AG number, an
(optional) inode number and generation, and a flags argument. This will
be used by the upcoming XFS online scrub tool.
Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
fs/xfs/Kconfig | 17 +
fs/xfs/Makefile | 7 +
fs/xfs/libxfs/xfs_fs.h | 41 ++++
fs/xfs/scrub/common.c | 533 ++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/common.h | 179 +++++++++++++++
fs/xfs/scrub/xfs_scrub.h | 29 +++
fs/xfs/xfs_ioctl.c | 28 ++
fs/xfs/xfs_ioctl32.c | 1
fs/xfs/xfs_trace.h | 7 +
9 files changed, 841 insertions(+), 1 deletion(-)
create mode 100644 fs/xfs/scrub/common.c
create mode 100644 fs/xfs/scrub/common.h
create mode 100644 fs/xfs/scrub/xfs_scrub.h
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 1b98cfa..f42fcf1 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -71,6 +71,23 @@ config XFS_RT
If unsure, say N.
+config XFS_ONLINE_SCRUB
+ bool "XFS online metadata check support"
+ default n
+ depends on XFS_FS
+ help
+ If you say Y here you will be able to check metadata on a
+ mounted XFS filesystem. This feature is intended to reduce
+ filesystem downtime by supplementing xfs_repair. The key
+ advantage here is to look for problems proactively so that
+ they can be dealt with in a controlled manner.
+
+ This feature is considered EXPERIMENTAL. Use with caution!
+
+ See the xfs_scrub man page in section 8 for additional information.
+
+ If unsure, say N.
+
config XFS_WARN
bool "XFS Verbose Warnings"
depends on XFS_FS && !XFS_DEBUG
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 5b959ee..c4fdaa2 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -136,3 +136,10 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
+
+# online scrub/repair
+ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
+xfs-y += $(addprefix scrub/, \
+ common.o \
+ )
+endif
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 5dedab9..aeccc99 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -468,6 +468,46 @@ typedef struct xfs_swapext
#define XFS_FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+/* metadata scrubbing */
+struct xfs_scrub_metadata {
+ __u32 sm_type; /* What to check? */
+ __u32 sm_flags; /* flags; see below. */
+ __u64 sm_ino; /* inode number. */
+ __u32 sm_gen; /* inode generation. */
+ __u32 sm_agno; /* ag number. */
+ __u64 sm_reserved[5]; /* pad to 64 bytes */
+};
+
+/*
+ * Metadata types and flags for scrub operation.
+ */
+#define XFS_SCRUB_TYPE_TEST 0 /* dummy to test ioctl */
+#define XFS_SCRUB_TYPE_MAX 0
+
+/* i: repair this metadata */
+#define XFS_SCRUB_FLAG_REPAIR (1 << 0)
+/* o: metadata object needs repair */
+#define XFS_SCRUB_FLAG_CORRUPT (1 << 1)
+/* o: metadata object could be optimized */
+#define XFS_SCRUB_FLAG_PREEN (1 << 2)
+/* o: cross-referencing failed */
+#define XFS_SCRUB_FLAG_XFAIL (1 << 3)
+/* o: metadata object disagrees with cross-referenced metadata */
+#define XFS_SCRUB_FLAG_XCORRUPT (1 << 4)
+/* o: scan was not complete */
+#define XFS_SCRUB_FLAG_INCOMPLETE (1 << 5)
+/* o: metadata object looked funny but isn't corrupt */
+#define XFS_SCRUB_FLAG_WARNING (1 << 6)
+
+#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_FLAG_REPAIR)
+#define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_FLAG_CORRUPT | \
+ XFS_SCRUB_FLAG_PREEN | \
+ XFS_SCRUB_FLAG_XFAIL | \
+ XFS_SCRUB_FLAG_XCORRUPT | \
+ XFS_SCRUB_FLAG_INCOMPLETE | \
+ XFS_SCRUB_FLAG_WARNING)
+#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
+
/*
* AG reserved block counters
*/
@@ -520,6 +560,7 @@ struct xfs_fsop_ag_resblks {
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
/* XFS_IOC_GETFSMAP ------ hoisted 59 */
+#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
new file mode 100644
index 0000000..6931793
--- /dev/null
+++ b/fs/xfs/scrub/common.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/common.h"
+
+/*
+ * Online Scrub and Repair
+ *
+ * Traditionally, XFS (the kernel driver) did not know how to check or
+ * repair on-disk data structures. That task was left to the xfs_check
+ * and xfs_repair tools, both of which require taking the filesystem
+ * offline for a thorough but time consuming examination. Online
+ * scrub & repair, on the other hand, enables us to check the metadata
+ * for obvious errors while carefully stepping around the filesystem's
+ * ongoing operations, locking rules, etc.
+ *
+ * Given that most XFS metadata consist of records stored in a btree,
+ * most of the checking functions iterate the btree blocks themselves
+ * looking for irregularities. When a record block is encountered, each
+ * record can be checked for obviously bad values. Record values can
+ * also be cross-referenced against other btrees to look for potential
+ * misunderstandings between pieces of metadata.
+ *
+ * It is expected that the checkers responsible for per-AG metadata
+ * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
+ * metadata structure, and perform any relevant cross-referencing before
+ * unlocking the AG and returning the results to userspace. These
+ * scrubbers must not keep an AG locked for too long to avoid tying up
+ * the block and inode allocators.
+ *
+ * Block maps and b-trees rooted in an inode present a special challenge
+ * because they can involve extents from any AG. The general scrubber
+ * structure of lock -> check -> xref -> unlock still holds, but AG
+ * locking order rules /must/ be obeyed to avoid deadlocks. The
+ * ordering rule, of course, is that we must lock in increasing AG
+ * order. Helper functions are provided to track which AG headers we've
+ * already locked. If we detect an imminent locking order violation, we
+ * can signal a potential deadlock, in which case the scrubber can jump
+ * out to the top level, lock all the AGs in order, and retry the scrub.
+ *
+ * For file data (directories, extended attributes, symlinks) scrub, we
+ * can simply lock the inode and walk the data. For btree data
+ * (directories and attributes) we follow the same btree-scrubbing
+ * strategy outlined previously to check the records.
+ *
+ * We use a bit of trickery with transactions to avoid buffer deadlocks
+ * if there is a cycle in the metadata. The basic problem is that
+ * travelling down a btree involves locking the current buffer at each
+ * tree level. If a pointer should somehow point back to a buffer that
+ * we've already examined, we will deadlock due to the second buffer
+ * locking attempt. Note however that grabbing a buffer in transaction
+ * context links the locked buffer to the transaction. If we try to
+ * re-grab the buffer in the context of the same transaction, we avoid
+ * the second lock attempt and continue. Between the verifier and the
+ * scrubber, something will notice that something is amiss and report
+ * the corruption. Therefore, each scrubber will allocate an empty
+ * transaction, attach buffers to it, and cancel the transaction at the
+ * end of the scrub run. Cancelling a non-dirty transaction simply
+ * unlocks the buffers.
+ *
+ * There are four pieces of data that scrub can communicate to
+ * userspace. The first is the error code (errno), which can be used to
+ * communicate operational errors in performing the scrub. There are
+ * also three flags that can be set in the scrub context. If the data
+ * structure itself is corrupt, the CORRUPT flag will be set. If
+ * the metadata is correct but otherwise suboptimal, the PREEN flag
+ * will be set.
+ */
+
+struct xfs_scrub_meta_fns {
+ int (*setup)(struct xfs_scrub_context *,
+ struct xfs_inode *);
+ int (*scrub)(struct xfs_scrub_context *);
+ bool (*has)(struct xfs_sb *);
+};
+
+/* Check for operational errors. */
+bool
+xfs_scrub_op_ok(
+ struct xfs_scrub_context *sc,
+ xfs_agnumber_t agno,
+ xfs_agblock_t bno,
+ const char *type,
+ int *error,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ switch (*error) {
+ case 0:
+ return true;
+ case -EDEADLOCK:
+ /* Used to restart an op with deadlock avoidance. */
+ trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+ break;
+ case -EFSBADCRC:
+ case -EFSCORRUPTED:
+ /* Note the badness but don't abort. */
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ *error = 0;
+ /* fall through */
+ default:
+ trace_xfs_scrub_op_error(mp, agno, bno, type, *error, func,
+ line);
+ break;
+ }
+ return false;
+}
+
+/* Check for operational errors for a file offset. */
+bool
+xfs_scrub_file_op_ok(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ const char *type,
+ int *error,
+ const char *func,
+ int line)
+{
+ switch (*error) {
+ case 0:
+ return true;
+ case -EDEADLOCK:
+ /* Used to restart an op with deadlock avoidance. */
+ trace_xfs_scrub_deadlock_retry(sc->ip, sc->sm, *error);
+ break;
+ case -EFSBADCRC:
+ case -EFSCORRUPTED:
+ /* Note the badness but don't abort. */
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ *error = 0;
+ /* fall through */
+ default:
+ trace_xfs_scrub_file_op_error(sc->ip, whichfork, offset, type,
+ *error, func, line);
+ break;
+ }
+ return false;
+}
+
+/* Check for metadata block optimization possibilities. */
+bool
+xfs_scrub_block_preen(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->mp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+ trace_xfs_scrub_block_preen(mp, agno, bno, type, check, func, line);
+ return fs_ok;
+}
+
+/* Check for metadata block corruption. */
+bool
+xfs_scrub_block_ok(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_mount *mp = sc->mp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ trace_xfs_scrub_block_error(mp, agno, bno, type, check, func, line);
+ return fs_ok;
+}
+
+/* Check for inode metadata corruption. */
+bool
+xfs_scrub_ino_ok(
+ struct xfs_scrub_context *sc,
+ xfs_ino_t ino,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_mount *mp = sc->mp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ if (bp) {
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+ } else {
+ agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ }
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ trace_xfs_scrub_ino_error(mp, ino, agno, bno, type, check, func, line);
+ return fs_ok;
+}
+
+/* Check for inode metadata optimization possibilities. */
+bool
+xfs_scrub_ino_preen(
+ struct xfs_scrub_context *sc,
+ struct xfs_buf *bp,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ struct xfs_inode *ip = sc->ip;
+ struct xfs_mount *mp = sc->mp;
+ xfs_fsblock_t fsbno;
+ xfs_agnumber_t agno;
+ xfs_agblock_t bno;
+
+ if (fs_ok)
+ return fs_ok;
+
+ if (bp) {
+ fsbno = XFS_DADDR_TO_FSB(mp, bp->b_bn);
+ agno = XFS_FSB_TO_AGNO(mp, fsbno);
+ bno = XFS_FSB_TO_AGBNO(mp, fsbno);
+ } else {
+ agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+ bno = XFS_INO_TO_AGINO(mp, ip->i_ino);
+ }
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+ trace_xfs_scrub_ino_preen(mp, ip->i_ino, agno, bno, type, check,
+ func, line);
+ return fs_ok;
+}
+
+/* Check for file data block corruption. */
+bool
+xfs_scrub_data_ok(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ if (fs_ok)
+ return fs_ok;
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ trace_xfs_scrub_data_error(sc->ip, whichfork, offset, type, check,
+ func, line);
+ return fs_ok;
+}
+
+/* Check for file data block non-corruption problems. */
+bool
+xfs_scrub_data_warn_ok(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_fileoff_t offset,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ if (fs_ok)
+ return fs_ok;
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_WARNING;
+ trace_xfs_scrub_data_warning(sc->ip, whichfork, offset, type, check,
+ func, line);
+ return fs_ok;
+}
+
+/* Signal an incomplete scrub. */
+bool
+xfs_scrub_incomplete(
+ struct xfs_scrub_context *sc,
+ const char *type,
+ bool fs_ok,
+ const char *check,
+ const char *func,
+ int line)
+{
+ if (fs_ok)
+ return fs_ok;
+
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_INCOMPLETE;
+ trace_xfs_scrub_incomplete(sc->mp, type, check, func, line);
+ return fs_ok;
+}
+
+/* Dummy scrubber */
+
+int
+xfs_scrub_dummy(
+ struct xfs_scrub_context *sc)
+{
+ if (sc->sm->sm_ino || sc->sm->sm_agno)
+ return -EINVAL;
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_CORRUPT)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_CORRUPT;
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_PREEN)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_PREEN;
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XFAIL)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_XFAIL;
+ if (sc->sm->sm_gen & XFS_SCRUB_FLAG_XCORRUPT)
+ sc->sm->sm_flags |= XFS_SCRUB_FLAG_XCORRUPT;
+ if (sc->sm->sm_gen & ~XFS_SCRUB_FLAGS_OUT)
+ return -ENOENT;
+
+ return 0;
+}
+
+/* Per-scrubber setup functions */
+
+/* Set us up with a transaction and an empty context. */
+int
+xfs_scrub_setup_fs(
+ struct xfs_scrub_context *sc,
+ struct xfs_inode *ip)
+{
+ return xfs_scrub_trans_alloc(sc->sm, sc->mp,
+ &M_RES(sc->mp)->tr_itruncate, 0, 0, 0, &sc->tp);
+}
+
+/* Scrub setup and teardown */
+
+/* Free all the resources and finish the transactions. */
+STATIC int
+xfs_scrub_teardown(
+ struct xfs_scrub_context *sc,
+ int error)
+{
+ if (sc->tp) {
+ xfs_trans_cancel(sc->tp);
+ sc->tp = NULL;
+ }
+ return error;
+}
+
+/* Perform common scrub context initialization. */
+STATIC int
+xfs_scrub_setup(
+ struct xfs_inode *ip,
+ struct xfs_scrub_context *sc,
+ const struct xfs_scrub_meta_fns *fns,
+ struct xfs_scrub_metadata *sm,
+ bool try_harder)
+{
+ memset(sc, 0, sizeof(*sc));
+ sc->mp = ip->i_mount;
+ sc->sm = sm;
+ sc->fns = fns;
+ sc->try_harder = try_harder;
+
+ return sc->fns->setup(sc, ip);
+}
+
+/* Scrubbing dispatch. */
+
+static const struct xfs_scrub_meta_fns meta_scrub_fns[] = {
+ { /* dummy verifier */
+ .setup = xfs_scrub_setup_fs,
+ .scrub = xfs_scrub_dummy,
+ },
+};
+
+/* Dispatch metadata scrubbing. */
+int
+xfs_scrub_metadata(
+ struct xfs_inode *ip,
+ struct xfs_scrub_metadata *sm)
+{
+ struct xfs_scrub_context sc;
+ struct xfs_mount *mp = ip->i_mount;
+ const struct xfs_scrub_meta_fns *fns;
+ bool try_harder = false;
+ int error = 0;
+
+ trace_xfs_scrub(ip, sm, error);
+
+ /* Forbidden if we are shut down or mounted norecovery. */
+ error = -ESHUTDOWN;
+ if (XFS_FORCED_SHUTDOWN(mp))
+ goto out;
+ error = -ENOTRECOVERABLE;
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+ goto out;
+
+ /* Check our inputs. */
+ error = -EINVAL;
+ sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+ if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
+ goto out;
+ if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
+ goto out;
+
+ /* Do we know about this type of metadata? */
+ error = -ENOENT;
+ if (sm->sm_type > XFS_SCRUB_TYPE_MAX)
+ goto out;
+ fns = &meta_scrub_fns[sm->sm_type];
+ if (fns->scrub == NULL)
+ goto out;
+
+ /* Does this fs even support this type of metadata? */
+ if (fns->has && !fns->has(&mp->m_sb))
+ goto out;
+
+ /* We don't know how to repair anything yet. */
+ error = -EOPNOTSUPP;
+ if (sm->sm_flags & XFS_SCRUB_FLAG_REPAIR)
+ goto out;
+
+ /* This isn't a stable feature. Use with care. */
+ {
+ static bool warned;
+
+ if (!warned)
+ xfs_alert(mp,
+ "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
+ warned = true;
+ }
+
+retry_op:
+ /* Set up for the operation. */
+ error = xfs_scrub_setup(ip, &sc, fns, sm, try_harder);
+ if (error)
+ goto out_teardown;
+
+ /* Scrub for errors. */
+ error = fns->scrub(&sc);
+ if (!try_harder && error == -EDEADLOCK) {
+ /*
+ * Scrubbers return -EDEADLOCK to mean 'try harder'.
+ * Tear down everything we hold, then set up again with
+ * preparation for worst-case scenarios.
+ */
+ error = xfs_scrub_teardown(&sc, 0);
+ if (error)
+ goto out;
+ try_harder = true;
+ goto retry_op;
+ } else if (error)
+ goto out_teardown;
+
+ if (xfs_scrub_found_corruption(sm))
+ xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+
+out_teardown:
+ error = xfs_scrub_teardown(&sc, error);
+out:
+ trace_xfs_scrub_done(ip, sm, error);
+ return error;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
new file mode 100644
index 0000000..4f3113a
--- /dev/null
+++ b/fs/xfs/scrub/common.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_REPAIR_COMMON_H__
+#define __XFS_REPAIR_COMMON_H__
+
+/* Did we find something broken? */
+static inline bool xfs_scrub_found_corruption(struct xfs_scrub_metadata *sm)
+{
+ return sm->sm_flags & (XFS_SCRUB_FLAG_CORRUPT |
+ XFS_SCRUB_FLAG_XCORRUPT);
+}
+
+struct xfs_scrub_context {
+ /* General scrub state. */
+ struct xfs_mount *mp;
+ struct xfs_scrub_metadata *sm;
+ const struct xfs_scrub_meta_fns *fns;
+ struct xfs_trans *tp;
+ struct xfs_inode *ip;
+ bool try_harder;
+};
+
+/* Should we end the scrub early? */
+static inline bool
+xfs_scrub_should_terminate(
+ int *error)
+{
+ if (fatal_signal_pending(current)) {
+ if (*error == 0)
+ *error = -EAGAIN;
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Grab a transaction. If we're going to repair something, we need to
+ * ensure there's enough reservation to make all the changes. If not,
+ * we can use an empty transaction.
+ */
+static inline int
+xfs_scrub_trans_alloc(
+ struct xfs_scrub_metadata *sm,
+ struct xfs_mount *mp,
+ struct xfs_trans_res *resp,
+ uint blocks,
+ uint rtextents,
+ uint flags,
+ struct xfs_trans **tpp)
+{
+ return xfs_trans_alloc_empty(mp, tpp);
+}
+
+/* Check for operational errors. */
+bool xfs_scrub_op_ok(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
+ xfs_agblock_t bno, const char *type, int *error,
+ const char *func, int line);
+#define XFS_SCRUB_OP_ERROR_GOTO(sc, agno, bno, type, error, label) \
+ do { \
+ if (!xfs_scrub_op_ok((sc), (agno), (bno), (type), \
+ (error), __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for operational errors for a file offset. */
+bool xfs_scrub_file_op_ok(struct xfs_scrub_context *sc, int whichfork,
+ xfs_fileoff_t offset, const char *type,
+ int *error, const char *func, int line);
+#define XFS_SCRUB_FILE_OP_ERROR_GOTO(sc, which, off, type, error, label) \
+ do { \
+ if (!xfs_scrub_file_op_ok((sc), (which), (off), (type), \
+ (error), __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for metadata block optimization possibilities. */
+bool xfs_scrub_block_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+ const char *type, bool fs_ok, const char *check,
+ const char *func, int line);
+#define XFS_SCRUB_PREEN(sc, bp, type, fs_ok) \
+ xfs_scrub_block_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+
+/* Check for inode metadata optimization possibilities. */
+bool xfs_scrub_ino_preen(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+ const char *type, bool fs_ok, const char *check,
+ const char *func, int line);
+#define XFS_SCRUB_INO_PREEN(sc, bp, type, fs_ok) \
+ xfs_scrub_ino_preen((sc), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+
+/* Check for metadata block corruption. */
+bool xfs_scrub_block_ok(struct xfs_scrub_context *sc, struct xfs_buf *bp,
+ const char *type, bool fs_ok, const char *check,
+ const char *func, int line);
+#define XFS_SCRUB_CHECK(sc, bp, type, fs_ok) \
+ xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+#define XFS_SCRUB_GOTO(sc, bp, type, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_block_ok((sc), (bp), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for inode metadata corruption. */
+bool xfs_scrub_ino_ok(struct xfs_scrub_context *sc, xfs_ino_t ino,
+ struct xfs_buf *bp, const char *type, bool fs_ok,
+ const char *check, const char *func, int line);
+#define XFS_SCRUB_INO_CHECK(sc, ino, bp, type, fs_ok) \
+ xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), #fs_ok, \
+ __func__, __LINE__)
+#define XFS_SCRUB_INO_GOTO(sc, ino, bp, type, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_ino_ok((sc), (ino), (bp), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for file data block corruption. */
+bool xfs_scrub_data_ok(struct xfs_scrub_context *sc, int whichfork,
+ xfs_fileoff_t offset, const char *type, bool fs_ok,
+ const char *check, const char *func, int line);
+#define XFS_SCRUB_DATA_CHECK(sc, whichfork, offset, type, fs_ok) \
+ xfs_scrub_data_ok((sc), (whichfork), (offset), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)
+#define XFS_SCRUB_DATA_GOTO(sc, whichfork, offset, type, fs_ok, label) \
+ do { \
+ if (!xfs_scrub_data_ok((sc), (whichfork), (offset), \
+ (type), (fs_ok), #fs_ok, __func__, __LINE__)) \
+ goto label; \
+ } while (0)
+
+/* Check for file data block non-corruption problems. */
+bool xfs_scrub_data_warn_ok(struct xfs_scrub_context *sc, int whichfork,
+ xfs_fileoff_t offset, const char *type, bool fs_ok,
+ const char *check, const char *func, int line);
+#define XFS_SCRUB_DATA_WARN(sc, whichfork, offset, type, fs_ok) \
+ xfs_scrub_data_warn_ok((sc), (whichfork), (offset), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)
+
+/* Signal an incomplete scrub. */
+bool xfs_scrub_incomplete(struct xfs_scrub_context *sc, const char *type,
+ bool fs_ok, const char *check, const char *func,
+ int line);
+#define XFS_SCRUB_INCOMPLETE(sc, type, fs_ok) \
+ xfs_scrub_incomplete((sc), (type), (fs_ok), \
+ #fs_ok, __func__, __LINE__)
+
+/* Setup functions */
+
+#define SETUP_FN(name) int name(struct xfs_scrub_context *sc, struct xfs_inode *ip)
+SETUP_FN(xfs_scrub_setup_fs);
+#undef SETUP_FN
+
+/* Metadata scrubbers */
+
+#define SCRUB_FN(name) int name(struct xfs_scrub_context *sc)
+SCRUB_FN(xfs_scrub_dummy);
+#undef SCRUB_FN
+
+#endif /* __XFS_REPAIR_COMMON_H__ */
diff --git a/fs/xfs/scrub/xfs_scrub.h b/fs/xfs/scrub/xfs_scrub.h
new file mode 100644
index 0000000..e00e0ea
--- /dev/null
+++ b/fs/xfs/scrub/xfs_scrub.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_SCRUB_H__
+#define __XFS_SCRUB_H__
+
+#ifndef CONFIG_XFS_ONLINE_SCRUB
+# define xfs_scrub_metadata(ip, sm) (-ENOTTY)
+#else
+int xfs_scrub_metadata(struct xfs_inode *ip, struct xfs_scrub_metadata *sm);
+#endif /* CONFIG_XFS_ONLINE_SCRUB */
+
+#endif /* __XFS_SCRUB_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index cc00260..87b3874 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -44,6 +44,7 @@
#include "xfs_btree.h"
#include <linux/fsmap.h>
#include "xfs_fsmap.h"
+#include "scrub/xfs_scrub.h"
#include <linux/capability.h>
#include <linux/cred.h>
@@ -1689,6 +1690,30 @@ xfs_ioc_getfsmap(
return 0;
}
+STATIC int
+xfs_ioc_scrub_metadata(
+ struct xfs_inode *ip,
+ void __user *arg)
+{
+ struct xfs_scrub_metadata scrub;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&scrub, arg, sizeof(scrub)))
+ return -EFAULT;
+
+ error = xfs_scrub_metadata(ip, &scrub);
+ if (error)
+ return error;
+
+ if (copy_to_user(arg, &scrub, sizeof(scrub)))
+ return -EFAULT;
+
+ return 0;
+}
+
int
xfs_ioc_swapext(
xfs_swapext_t *sxp)
@@ -1872,6 +1897,9 @@ xfs_file_ioctl(
case FS_IOC_GETFSMAP:
return xfs_ioc_getfsmap(ip, arg);
+ case XFS_IOC_SCRUB_METADATA:
+ return xfs_ioc_scrub_metadata(ip, arg);
+
case XFS_IOC_FD_TO_HANDLE:
case XFS_IOC_PATH_TO_HANDLE:
case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index e8b4de3..972d4bd 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -557,6 +557,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_ERROR_CLEARALL:
case FS_IOC_GETFSMAP:
case XFS_IOC_GET_AG_RESBLKS:
+ case XFS_IOC_SCRUB_METADATA:
return xfs_file_ioctl(filp, cmd, p);
#ifndef BROKEN_X86_ALIGNMENT
/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e7e193..d4de29b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3312,7 +3312,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
/* scrub */
#define XFS_SCRUB_TYPE_DESC \
- { 0, NULL }
+ { XFS_SCRUB_TYPE_TEST, "dummy" }
DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm,
int error),
@@ -3330,6 +3330,11 @@ DECLARE_EVENT_CLASS(xfs_scrub_class,
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
+ __entry->type = sm->sm_type;
+ __entry->agno = sm->sm_agno;
+ __entry->inum = sm->sm_ino;
+ __entry->gen = sm->sm_gen;
+ __entry->flags = sm->sm_flags;
__entry->error = error;
),
TP_printk("dev %d:%d ino %llu type %s agno %u inum %llu gen %u flags 0x%x error %d",
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html