From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Scrub an individual inode's block mappings to make sure they make sense. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/Makefile | 1 fs/xfs/libxfs/xfs_fs.h | 5 + fs/xfs/scrub/bmap.c | 378 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/common.c | 12 ++ fs/xfs/scrub/common.h | 5 + fs/xfs/xfs_trace.h | 5 + 6 files changed, 404 insertions(+), 2 deletions(-) create mode 100644 fs/xfs/scrub/bmap.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 471fc27..73c7189 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -106,6 +106,7 @@ xfs-y += xfs_aops.o \ xfs-$(CONFIG_XFS_DEBUG) += $(addprefix scrub/, \ agheader.o \ alloc.o \ + bmap.o \ btree.o \ common.o \ ialloc.o \ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 39e6441a..4984582 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -504,7 +504,10 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_RMAPBT 9 /* reverse mapping btree */ #define XFS_SCRUB_TYPE_REFCNTBT 10 /* reference count btree */ #define XFS_SCRUB_TYPE_INODE 11 /* inode record */ -#define XFS_SCRUB_TYPE_MAX 11 +#define XFS_SCRUB_TYPE_BMBTD 12 /* data fork block mapping */ +#define XFS_SCRUB_TYPE_BMBTA 13 /* attr fork block mapping */ +#define XFS_SCRUB_TYPE_BMBTC 14 /* CoW fork block mapping */ +#define XFS_SCRUB_TYPE_MAX 14 #define XFS_SCRUB_FLAG_REPAIR 0x01 /* i: repair this metadata */ #define XFS_SCRUB_FLAG_CORRUPT 0x02 /* o: needs repair */ diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c new file mode 100644 index 0000000..e148a49 --- /dev/null +++ b/fs/xfs/scrub/bmap.c @@ -0,0 +1,378 @@ +/* + * Copyright (C) 2017 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_trace.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_inode_fork.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_rmap.h" +#include "scrub/common.h" +#include "scrub/btree.h" + +/* Set us up with an inode's bmap. */ +STATIC int +__xfs_scrub_setup_inode_bmap( + struct xfs_scrub_context *sc, + struct xfs_inode *ip, + bool flush_data) +{ + struct xfs_mount *mp = sc->mp; + int error; + + error = xfs_scrub_get_inode(sc, ip); + if (error) + return error; + + sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; + xfs_ilock(sc->ip, sc->ilock_flags); + + /* + * We don't want any ephemeral data fork updates sitting around + * while we inspect block mappings, so wait for directio to finish + * and flush dirty data if we have delalloc reservations. + */ + if (S_ISREG(VFS_I(sc->ip)->i_mode) && flush_data) { + inode_dio_wait(VFS_I(sc->ip)); + error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping); + if (error) + goto out_unlock; + error = invalidate_inode_pages2(VFS_I(sc->ip)->i_mapping); + if (error) + goto out_unlock; + } + + /* Got the inode, lock it and we're ready to go. */ + error = xfs_scrub_trans_alloc(sc->sm, mp, &M_RES(mp)->tr_itruncate, + 0, 0, 0, &sc->tp); + if (error) + goto out_unlock; + sc->ilock_flags |= XFS_ILOCK_EXCL; + xfs_ilock(sc->ip, XFS_ILOCK_EXCL); + + return 0; +out_unlock: + xfs_iunlock(sc->ip, sc->ilock_flags); + if (sc->ip != ip) + IRELE(sc->ip); + sc->ip = NULL; + return error; +} + +/* Set us up to scrub the data fork. */ +int +xfs_scrub_setup_inode_bmap_data( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return __xfs_scrub_setup_inode_bmap(sc, ip, true); +} + +/* Set us up to scrub the attr or CoW fork. */ +int +xfs_scrub_setup_inode_bmap( + struct xfs_scrub_context *sc, + struct xfs_inode *ip) +{ + return __xfs_scrub_setup_inode_bmap(sc, ip, false); +} + +/* + * Inode fork block mapping (BMBT) scrubber. + * More complex than the others because we have to scrub + * all the extents regardless of whether or not the fork + * is in btree format. + */ + +struct xfs_scrub_bmap_info { + struct xfs_scrub_context *sc; + const char *type; + xfs_daddr_t eofs; + xfs_fileoff_t lastoff; + bool is_rt; + bool is_shared; + int whichfork; +}; + +#define XFS_SCRUB_BMAP_CHECK(fs_ok) \ + XFS_SCRUB_INO_CHECK(info->sc, info->sc->ip->i_ino, bp, info->type, fs_ok) +#define XFS_SCRUB_BMAP_GOTO(fs_ok, label) \ + XFS_SCRUB_INO_GOTO(info->sc, info->sc->ip->i_ino, bp, info->type, fs_ok, label) +#define XFS_SCRUB_BMAP_OP_ERROR_GOTO(label) \ + XFS_SCRUB_OP_ERROR_GOTO(info->sc, agno, 0, "bmap", &error, label) +/* Scrub a single extent record. */ +STATIC int +xfs_scrub_bmap_extent( + struct xfs_inode *ip, + struct xfs_btree_cur *cur, + struct xfs_scrub_bmap_info *info, + struct xfs_bmbt_irec *irec) +{ + struct xfs_scrub_ag sa = { 0 }; + struct xfs_mount *mp = info->sc->mp; + struct xfs_buf *bp = NULL; + xfs_daddr_t daddr; + xfs_daddr_t dlen; + xfs_fsblock_t bno; + xfs_agnumber_t agno; + int error = 0; + + if (cur) + xfs_btree_get_block(cur, 0, &bp); + + XFS_SCRUB_BMAP_CHECK(irec->br_startoff >= info->lastoff); + XFS_SCRUB_BMAP_CHECK(irec->br_startblock != HOLESTARTBLOCK); + XFS_SCRUB_BMAP_CHECK(!isnullstartblock(irec->br_startblock)); + + /* Actual mapping, so check the block ranges. */ + if (info->is_rt) { + daddr = XFS_FSB_TO_BB(mp, irec->br_startblock); + agno = NULLAGNUMBER; + bno = irec->br_startblock; + } else { + daddr = XFS_FSB_TO_DADDR(mp, irec->br_startblock); + agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); + XFS_SCRUB_BMAP_GOTO(agno < mp->m_sb.sb_agcount, out); + bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); + XFS_SCRUB_BMAP_CHECK(bno < mp->m_sb.sb_agblocks); + } + dlen = XFS_FSB_TO_BB(mp, irec->br_blockcount); + XFS_SCRUB_BMAP_CHECK(irec->br_blockcount > 0); + XFS_SCRUB_BMAP_CHECK(irec->br_blockcount <= MAXEXTLEN); + XFS_SCRUB_BMAP_CHECK(daddr < info->eofs); + XFS_SCRUB_BMAP_CHECK(daddr + dlen <= info->eofs); + XFS_SCRUB_BMAP_CHECK(irec->br_state != XFS_EXT_UNWRITTEN || + xfs_sb_version_hasextflgbit(&mp->m_sb)); + if (error) + goto out; + + /* Set ourselves up for cross-referencing later. */ + if (!info->is_rt) { + error = xfs_scrub_ag_init(info->sc, agno, &sa); + XFS_SCRUB_BMAP_OP_ERROR_GOTO(out); + } + + xfs_scrub_ag_free(info->sc, &sa); +out: + info->lastoff = irec->br_startoff + irec->br_blockcount; + return error; +} +#undef XFS_SCRUB_BMAP_OP_ERROR_GOTO +#undef XFS_SCRUB_BMAP_GOTO + +/* Scrub a bmbt record. */ +STATIC int +xfs_scrub_bmapbt_helper( + struct xfs_scrub_btree *bs, + union xfs_btree_rec *rec) +{ + struct xfs_bmbt_rec_host ihost; + struct xfs_bmbt_irec irec; + struct xfs_scrub_bmap_info *info = bs->private; + struct xfs_inode *ip = bs->cur->bc_private.b.ip; + struct xfs_buf *bp = NULL; + struct xfs_btree_block *block; + uint64_t owner; + int i; + + /* + * Check the owners of the btree blocks up to the level below + * the root since the verifiers don't do that. + */ + if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) && + bs->cur->bc_ptrs[0] == 1) { + for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { + block = xfs_btree_get_block(bs->cur, i, &bp); + owner = be64_to_cpu(block->bb_u.l.bb_owner); + XFS_SCRUB_BMAP_CHECK(owner == ip->i_ino); + } + } + + /* Set up the in-core record and scrub it. */ + ihost.l0 = be64_to_cpu(rec->bmbt.l0); + ihost.l1 = be64_to_cpu(rec->bmbt.l1); + xfs_bmbt_get_all(&ihost, &irec); + return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec); +} +#undef XFS_SCRUB_BMAP_CHECK + +#define XFS_SCRUB_FORK_CHECK(fs_ok) \ + XFS_SCRUB_INO_CHECK(sc, ip->i_ino, NULL, info.type, fs_ok) +#define XFS_SCRUB_FORK_GOTO(fs_ok, label) \ + XFS_SCRUB_INO_GOTO(sc, ip->i_ino, NULL, info.type, fs_ok, label) +#define XFS_SCRUB_FORK_OP_ERROR_GOTO(label) \ + XFS_SCRUB_OP_ERROR_GOTO(sc, \ + XFS_INO_TO_AGNO(mp, ip->i_ino), \ + XFS_INO_TO_AGBNO(mp, ip->i_ino), \ + info.type, &error, label) +/* Scrub an inode fork's block mappings. */ +STATIC int +xfs_scrub_bmap( + struct xfs_scrub_context *sc, + int whichfork) +{ + struct xfs_bmbt_irec irec; + struct xfs_scrub_bmap_info info = {0}; + struct xfs_owner_info oinfo; + struct xfs_mount *mp = sc->mp; + struct xfs_inode *ip = sc->ip; + struct xfs_ifork *ifp; + struct xfs_btree_cur *cur; + xfs_fileoff_t endoff; + xfs_extnum_t idx; + bool found; + int error = 0; + int err2 = 0; + + switch (whichfork) { + case XFS_DATA_FORK: + info.type = "data fork"; + break; + case XFS_ATTR_FORK: + info.type = "attr fork"; + break; + case XFS_COW_FORK: + info.type = "CoW fork"; + break; + } + ifp = XFS_IFORK_PTR(ip, whichfork); + + info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); + info.eofs = XFS_FSB_TO_BB(mp, info.is_rt ? mp->m_sb.sb_rblocks : + mp->m_sb.sb_dblocks); + info.whichfork = whichfork; + info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); + info.sc = sc; + + switch (whichfork) { + case XFS_COW_FORK: + /* Non-existent CoW forks are ignorable. */ + if (!ifp) + goto out_unlock; + /* No CoW forks on non-reflink inodes/filesystems. */ + XFS_SCRUB_FORK_GOTO(xfs_is_reflink_inode(ip), out_unlock); + break; + case XFS_ATTR_FORK: + if (!ifp) + goto out_unlock; + XFS_SCRUB_FORK_CHECK(xfs_sb_version_hasattr(&mp->m_sb) || + xfs_sb_version_hasattr2(&mp->m_sb)); + break; + } + + /* Check the fork values */ + switch (XFS_IFORK_FORMAT(ip, whichfork)) { + case XFS_DINODE_FMT_UUID: + case XFS_DINODE_FMT_DEV: + case XFS_DINODE_FMT_LOCAL: + /* No mappings to check. */ + goto out_unlock; + case XFS_DINODE_FMT_EXTENTS: + XFS_SCRUB_FORK_GOTO(ifp->if_flags & XFS_IFEXTENTS, out_unlock); + break; + case XFS_DINODE_FMT_BTREE: + XFS_SCRUB_FORK_CHECK(whichfork != XFS_COW_FORK); + /* Scan the btree records. */ + cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); + err2 = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_helper, + &oinfo, &info); + xfs_btree_del_cursor(cur, err2 ? XFS_BTREE_ERROR : + XFS_BTREE_NOERROR); + if (err2 == -EDEADLOCK) + return err2; + else if (err2) + goto out_unlock; + break; + default: + XFS_SCRUB_FORK_GOTO(false, out_unlock); + break; + } + + /* Extent data is in memory, so scrub that. */ + + /* Find the offset of the last extent in the mapping. */ + error = xfs_bmap_last_offset(ip, &endoff, whichfork); + XFS_SCRUB_FORK_OP_ERROR_GOTO(out_unlock); + + /* Scrub extent records. */ + info.lastoff = 0; + ifp = XFS_IFORK_PTR(ip, whichfork); + found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &irec); + while (found) { + if (xfs_scrub_should_terminate(&error)) + break; + if (isnullstartblock(irec.br_startblock)) + continue; + XFS_SCRUB_FORK_CHECK(irec.br_startoff < endoff); + err2 = xfs_scrub_bmap_extent(ip, NULL, &info, &irec); + if (err2 == -EDEADLOCK) + return err2; + else if (!error && err2) + error = err2; + found = xfs_iext_get_extent(ifp, ++idx, &irec); + } + +out_unlock: + if (error == 0 && err2 != 0) + error = err2; + return error; +} +#undef XFS_SCRUB_FORK_CHECK +#undef XFS_SCRUB_FORK_GOTO + +/* Scrub an inode's data fork. */ +int +xfs_scrub_bmap_data( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_bmap(sc, XFS_DATA_FORK); +} + +/* Scrub an inode's attr fork. */ +int +xfs_scrub_bmap_attr( + struct xfs_scrub_context *sc) +{ + return xfs_scrub_bmap(sc, XFS_ATTR_FORK); +} + +/* Scrub an inode's CoW fork. */ +int +xfs_scrub_bmap_cow( + struct xfs_scrub_context *sc) +{ + if (!xfs_is_reflink_inode(sc->ip)) + return -ENOENT; + + return xfs_scrub_bmap(sc, XFS_COW_FORK); +} diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 4414a29..dafc315 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -762,6 +762,18 @@ static const struct xfs_scrub_meta_fns meta_scrub_fns[] = { .setup = xfs_scrub_setup_inode, .scrub = xfs_scrub_inode, }, + { /* inode data fork */ + .setup = xfs_scrub_setup_inode_bmap_data, + .scrub = xfs_scrub_bmap_data, + }, + { /* inode attr fork */ + .setup = xfs_scrub_setup_inode_bmap, + .scrub = xfs_scrub_bmap_attr, + }, + { /* inode CoW fork */ + .setup = xfs_scrub_setup_inode_bmap, + .scrub = xfs_scrub_bmap_cow, + }, }; /* Dispatch metadata scrubbing. */ diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index b4aab69..da4a14b 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -207,6 +207,8 @@ SETUP_FN(xfs_scrub_setup_ag_iallocbt); SETUP_FN(xfs_scrub_setup_ag_rmapbt); SETUP_FN(xfs_scrub_setup_ag_refcountbt); SETUP_FN(xfs_scrub_setup_inode); +SETUP_FN(xfs_scrub_setup_inode_bmap_data); +SETUP_FN(xfs_scrub_setup_inode_bmap); #undef SETUP_FN /* Metadata scrubbers */ @@ -223,6 +225,9 @@ SCRUB_FN(xfs_scrub_finobt); SCRUB_FN(xfs_scrub_rmapbt); SCRUB_FN(xfs_scrub_refcountbt); SCRUB_FN(xfs_scrub_inode); +SCRUB_FN(xfs_scrub_bmap_data); +SCRUB_FN(xfs_scrub_bmap_attr); +SCRUB_FN(xfs_scrub_bmap_cow); #undef SCRUB_FN #endif /* __XFS_REPAIR_COMMON_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ec11889..68a4c87 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3341,7 +3341,10 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping); { XFS_SCRUB_TYPE_FINOBT, "finobt" }, \ { XFS_SCRUB_TYPE_RMAPBT, "rmapbt" }, \ { XFS_SCRUB_TYPE_REFCNTBT, "refcountbt" }, \ - { XFS_SCRUB_TYPE_INODE, "inode" } + { XFS_SCRUB_TYPE_INODE, "inode" }, \ + { XFS_SCRUB_TYPE_BMBTD, "bmapbtd" }, \ + { XFS_SCRUB_TYPE_BMBTA, "bmapbta" }, \ + { XFS_SCRUB_TYPE_BMBTC, "bmapbtc" } DECLARE_EVENT_CLASS(xfs_scrub_class, TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_metadata *sm, int error), -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html