>From : Dave Chinner <david@xxxxxxxxxxxxx> Use the rmap btree to pre-populate the block type information so that when repair iterates the primary metadata, we can confirm the block type. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> [split patch, add commit message] Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- repair/dinode.c | 6 + repair/incore.h | 16 +- repair/scan.c | 336 ++++++++++++++++++++++++++++++++++++++++++++++++--- repair/xfs_repair.c | 2 4 files changed, 331 insertions(+), 29 deletions(-) diff --git a/repair/dinode.c b/repair/dinode.c index f78f907..e81c245 100644 --- a/repair/dinode.c +++ b/repair/dinode.c @@ -744,6 +744,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, " _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"), forkname, ino, (__uint64_t) b); /* fall through ... */ + case XR_E_INUSE1: /* seen by rmap */ case XR_E_UNKNOWN: set_bmap_ext(agno, agbno, blen, XR_E_INUSE); break; @@ -751,6 +752,11 @@ _("%s fork in ino %" PRIu64 " claims free block %" PRIu64 "\n"), case XR_E_BAD_STATE: do_error(_("bad state in block map %" PRIu64 "\n"), b); + case XR_E_FS_MAP1: + case XR_E_INO1: + case XR_E_INUSE_FS1: + do_warn(_("rmap claims metadata use!\n")); + /* fall through */ case XR_E_FS_MAP: case XR_E_INO: case XR_E_INUSE_FS: diff --git a/repair/incore.h b/repair/incore.h index c92475e..bc0810b 100644 --- a/repair/incore.h +++ b/repair/incore.h @@ -102,17 +102,11 @@ typedef struct rt_extent_tree_node { #define XR_E_MULT 5 /* extent is multiply referenced */ #define XR_E_INO 6 /* extent used by inodes (inode blocks) */ #define XR_E_FS_MAP 7 /* extent used by fs space/inode maps */ -#define XR_E_BAD_STATE 8 - -/* extent states, in 64 bit word chunks */ -#define XR_E_UNKNOWN_LL 0x0000000000000000LL -#define XR_E_FREE1_LL 0x1111111111111111LL -#define XR_E_FREE_LL 0x2222222222222222LL -#define XR_E_INUSE_LL 0x3333333333333333LL -#define XR_E_INUSE_FS_LL 0x4444444444444444LL -#define XR_E_MULT_LL 0x5555555555555555LL -#define XR_E_INO_LL 0x6666666666666666LL -#define XR_E_FS_MAP_LL 0x7777777777777777LL +#define XR_E_INUSE1 8 /* used block (marked by rmap btree) */ +#define XR_E_INUSE_FS1 9 /* used by fs ag header or log (rmap btree) */ +#define XR_E_INO1 10 /* used by inodes (marked by rmap btree) */ +#define XR_E_FS_MAP1 11 /* used by fs space/inode maps (rmap btree) */ +#define XR_E_BAD_STATE 12 /* separate state bit, OR'ed into high (4th) bit of ex_state field */ diff --git a/repair/scan.c b/repair/scan.c index 1e7a4da..c1ab6df 100644 --- a/repair/scan.c +++ b/repair/scan.c @@ -44,6 +44,7 @@ struct aghdr_cnts { __uint32_t agicount; __uint32_t agifreecount; __uint64_t fdblocks; + __uint64_t usedblocks; __uint64_t ifreecount; __uint32_t fibtfreecount; }; @@ -308,6 +309,13 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n" pthread_mutex_lock(&ag_locks[agno].lock); state = get_bmap(agno, agbno); switch (state) { + case XR_E_INUSE1: + /* + * block was claimed as in use data by the rmap + * btree, but has not been found in the data extent + * map for the inode. That means this bmbt block hasn't + * yet been claimed as in use, which means -it's ours- + */ case XR_E_UNKNOWN: case XR_E_FREE1: case XR_E_FREE: @@ -763,6 +771,252 @@ ino_issparse( return xfs_inobt_is_sparse_disk(rp, offset); } + +static void +scan_rmapbt( + struct xfs_btree_block *block, + int level, + xfs_agblock_t bno, + xfs_agnumber_t agno, + int suspect, + int isroot, + __uint32_t magic, + void *priv) +{ + struct aghdr_cnts *agcnts = priv; + const char *name = "rmap"; + int i; + xfs_rmap_ptr_t *pp; + struct xfs_rmap_rec *rp; + int hdr_errors = 0; + int numrecs; + int state; + xfs_agblock_t lastblock = 0; + + if (magic != XFS_RMAP_CRC_MAGIC) { + name = "(unknown)"; + assert(0); + } + + if (be32_to_cpu(block->bb_magic) != magic) { + do_warn(_("bad magic # %#x in bt%s block %d/%d\n"), + be32_to_cpu(block->bb_magic), name, agno, bno); + hdr_errors++; + if (suspect) + return; + } + + /* + * All RMAP btree blocks except the roots are freed for a + * fully empty filesystem, thus they are counted towards the + * free data block counter. + */ + if (!isroot) { + agcnts->agfbtreeblks++; + agcnts->fdblocks++; + } + + if (be16_to_cpu(block->bb_level) != level) { + do_warn(_("expected level %d got %d in bt%s block %d/%d\n"), + level, be16_to_cpu(block->bb_level), name, agno, bno); + hdr_errors++; + if (suspect) + return; + } + + /* check for btree blocks multiply claimed */ + state = get_bmap(agno, bno); + if (!(state == XR_E_UNKNOWN || state == XR_E_FS_MAP1)) { + set_bmap(agno, bno, XR_E_MULT); + do_warn( +_("%s rmap btree block claimed (state %d), agno %d, bno %d, suspect %d\n"), + name, state, agno, bno, suspect); + return; + } + set_bmap(agno, bno, XR_E_FS_MAP); + + numrecs = be16_to_cpu(block->bb_numrecs); + if (level == 0) { + if (numrecs > mp->m_rmap_mxr[0]) { + numrecs = mp->m_rmap_mxr[0]; + hdr_errors++; + } + if (isroot == 0 && numrecs < mp->m_rmap_mnr[0]) { + numrecs = mp->m_rmap_mnr[0]; + hdr_errors++; + } + + if (hdr_errors) { + do_warn( + _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"), + be16_to_cpu(block->bb_numrecs), + mp->m_rmap_mnr[0], mp->m_rmap_mxr[0], + name, agno, bno); + suspect++; + } + + rp = XFS_RMAP_REC_ADDR(block, 1); + for (i = 0; i < numrecs; i++) { + xfs_agblock_t b, end; + xfs_extlen_t len, blen; + int64_t owner; + + b = be32_to_cpu(rp[i].rm_startblock); + len = be32_to_cpu(rp[i].rm_blockcount); + owner = be64_to_cpu(rp[i].rm_owner); + end = b + len; + + if (!verify_agbno(mp, agno, b)) { + do_warn( + _("invalid start block %u in record %u of %s btree block %u/%u\n"), + b, i, name, agno, bno); + continue; + } + if (len == 0 || !verify_agbno(mp, agno, end - 1)) { + do_warn( + _("invalid length %u in record %u of %s btree block %u/%u\n"), + len, i, name, agno, bno); + continue; + } + + /* XXX: range check owner */ + + if (b && b <= lastblock) { + do_warn(_( + "out-of-order rmap btree record %d (%u %u) block %u/%u\n"), + i, b, len, agno, bno); + } else { + lastblock = b; + } + + for ( ; b < end; b += blen) { + state = get_bmap_ext(agno, b, end, &blen); + switch (state) { + case XR_E_UNKNOWN: + switch (owner) { + case XFS_RMAP_OWN_FS: + case XFS_RMAP_OWN_LOG: + set_bmap(agno, b, XR_E_INUSE_FS1); + break; + case XFS_RMAP_OWN_AG: + case XFS_RMAP_OWN_INOBT: + set_bmap(agno, b, XR_E_FS_MAP1); + break; + case XFS_RMAP_OWN_INODES: + set_bmap(agno, b, XR_E_INO1); + break; + case XFS_RMAP_OWN_NULL: + /* still unknown */ + break; + default: + /* file data */ + set_bmap(agno, b, XR_E_INUSE1); + break; + } + break; + case XR_E_INUSE_FS: + if (owner == XFS_RMAP_OWN_FS || + owner == XFS_RMAP_OWN_LOG) + break; + do_warn( +_("Static meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"), + agno, b, b + blen - 1, + name, state, owner); + break; + case XR_E_FS_MAP: + if (owner == XFS_RMAP_OWN_AG || + owner == XFS_RMAP_OWN_INOBT) + break; + do_warn( +_("AG meta block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"), + agno, b, b + blen - 1, + name, state, owner); + break; + case XR_E_INO: + if (owner == XFS_RMAP_OWN_INODES) + break; + do_warn( +_("inode block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"), + agno, b, b + blen - 1, + name, state, owner); + break; + case XR_E_INUSE: + if (owner >= 0 && + owner < mp->m_sb.sb_dblocks) + break; + do_warn( +_("in use block (%d,%d-%d) mismatch in %s tree, state - %d,%" PRIx64 "\n"), + agno, b, b + blen - 1, + name, state, owner); + break; + case XR_E_FREE1: + case XR_E_FREE: + /* + * May be on the AGFL. If not, they'll + * be caught later. + */ + break; + default: + do_warn( +_("unknown block (%d,%d-%d) mismatch on %s tree, state - %d,%" PRIx64 "\n"), + agno, b, b + blen - 1, + name, state, owner); + break; + } + } + } + return; + } + + /* + * interior record + */ + pp = XFS_RMAP_PTR_ADDR(block, 1, mp->m_rmap_mxr[1]); + + if (numrecs > mp->m_rmap_mxr[1]) { + numrecs = mp->m_rmap_mxr[1]; + hdr_errors++; + } + if (isroot == 0 && numrecs < mp->m_rmap_mnr[1]) { + numrecs = mp->m_rmap_mnr[1]; + hdr_errors++; + } + + /* + * don't pass bogus tree flag down further if this block + * looked ok. bail out if two levels in a row look bad. + */ + if (hdr_errors) { + do_warn( + _("bad btree nrecs (%u, min=%u, max=%u) in bt%s block %u/%u\n"), + be16_to_cpu(block->bb_numrecs), + mp->m_rmap_mnr[1], mp->m_rmap_mxr[1], + name, agno, bno); + if (suspect) + return; + suspect++; + } else if (suspect) { + suspect = 0; + } + + for (i = 0; i < numrecs; i++) { + xfs_agblock_t bno = be32_to_cpu(pp[i]); + + /* + * XXX - put sibling detection right here. + * we know our sibling chain is good. So as we go, + * we check the entry before and after each entry. + * If either of the entries references a different block, + * check the sibling pointer. If there's a sibling + * pointer mismatch, try and extract as much data + * as possible. + */ + if (bno != 0 && verify_agbno(mp, agno, bno)) { + scan_sbtree(bno, level, agno, suspect, scan_rmapbt, 0, + magic, priv, &xfs_rmapbt_buf_ops); + } + } +} /* * The following helpers are to help process and validate individual on-disk @@ -976,20 +1230,27 @@ scan_single_ino_chunk( agbno = XFS_AGINO_TO_AGBNO(mp, ino + j); state = get_bmap(agno, agbno); - if (state == XR_E_UNKNOWN) { - set_bmap(agno, agbno, XR_E_INO); - } else if (state == XR_E_INUSE_FS && agno == 0 && - ino + j >= first_prealloc_ino && - ino + j < last_prealloc_ino) { + switch (state) { + case XR_E_INO: + break; + case XR_E_UNKNOWN: + case XR_E_INO1: /* seen by rmap */ set_bmap(agno, agbno, XR_E_INO); - } else { + break; + case XR_E_INUSE_FS: + case XR_E_INUSE_FS1: + if (agno == 0 && + ino + j >= first_prealloc_ino && + ino + j < last_prealloc_ino) { + set_bmap(agno, agbno, XR_E_INO); + break; + } + /* fall through */ + default: + /* XXX - maybe should mark block a duplicate */ do_warn( _("inode chunk claims used block, inobt block - agno %d, bno %d, inopb %d\n"), agno, agbno, mp->m_sb.sb_inopblock); - /* - * XXX - maybe should mark - * block a duplicate - */ return ++suspect; } } @@ -1099,19 +1360,35 @@ _("sparse inode chunk claims inode block, finobt block - agno %d, bno %d, inopb continue; } - if (state == XR_E_INO) { - continue; - } else if ((state == XR_E_UNKNOWN) || - (state == XR_E_INUSE_FS && agno == 0 && - ino + j >= first_prealloc_ino && - ino + j < last_prealloc_ino)) { + switch (state) { + case XR_E_INO: + break; + case XR_E_INO1: /* seen by rmap */ + set_bmap(agno, agbno, XR_E_INO); + break; + case XR_E_UNKNOWN: do_warn( _("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"), agno, agbno, mp->m_sb.sb_inopblock); set_bmap(agno, agbno, XR_E_INO); suspect++; - } else { + break; + case XR_E_INUSE_FS: + case XR_E_INUSE_FS1: + if (agno == 0 && + ino + j >= first_prealloc_ino && + ino + j < last_prealloc_ino) { + do_warn( +_("inode chunk claims untracked block, finobt block - agno %d, bno %d, inopb %d\n"), + agno, agbno, mp->m_sb.sb_inopblock); + + set_bmap(agno, agbno, XR_E_INO); + suspect++; + break; + } + /* fall through */ + default: do_warn( _("inode chunk claims used block, finobt block - agno %d, bno %d, inopb %d\n"), agno, agbno, mp->m_sb.sb_inopblock); @@ -1280,6 +1557,7 @@ scan_inobt( */ state = get_bmap(agno, bno); switch (state) { + case XR_E_FS_MAP1: /* already been seen by an rmap scan */ case XR_E_UNKNOWN: case XR_E_FREE1: case XR_E_FREE: @@ -1420,7 +1698,7 @@ scan_freelist( if (XFS_SB_BLOCK(mp) != XFS_AGFL_BLOCK(mp) && XFS_AGF_BLOCK(mp) != XFS_AGFL_BLOCK(mp) && XFS_AGI_BLOCK(mp) != XFS_AGFL_BLOCK(mp)) - set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_FS_MAP); + set_bmap(agno, XFS_AGFL_BLOCK(mp), XR_E_INUSE_FS); if (be32_to_cpu(agf->agf_flcount) == 0) return; @@ -1505,6 +1783,19 @@ validate_agf( bno, agno); } + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + bno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]); + if (bno != 0 && verify_agbno(mp, agno, bno)) { + scan_sbtree(bno, + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]), + agno, 0, scan_rmapbt, 1, XFS_RMAP_CRC_MAGIC, + agcnts, &xfs_rmapbt_buf_ops); + } else { + do_warn(_("bad agbno %u for rmapbt root, agno %d\n"), + bno, agno); + } + } + if (be32_to_cpu(agf->agf_freeblks) != agcnts->agffreeblks) { do_warn(_("agf_freeblks %u, counted %u in ag %u\n"), be32_to_cpu(agf->agf_freeblks), agcnts->agffreeblks, agno); @@ -1520,6 +1811,7 @@ validate_agf( do_warn(_("agf_btreeblks %u, counted %" PRIu64 " in ag %u\n"), be32_to_cpu(agf->agf_btreeblks), agcnts->agfbtreeblks, agno); } + } static void @@ -1759,6 +2051,7 @@ scan_ags( __uint64_t fdblocks = 0; __uint64_t icount = 0; __uint64_t ifreecount = 0; + __uint64_t usedblocks = 0; xfs_agnumber_t i; work_queue_t wq; @@ -1781,6 +2074,7 @@ scan_ags( fdblocks += agcnts[i].fdblocks; icount += agcnts[i].agicount; ifreecount += agcnts[i].ifreecount; + usedblocks += agcnts[i].usedblocks; } free(agcnts); @@ -1802,5 +2096,11 @@ scan_ags( do_warn(_("sb_fdblocks %" PRIu64 ", counted %" PRIu64 "\n"), mp->m_sb.sb_fdblocks, fdblocks); } + + if (usedblocks && + usedblocks != mp->m_sb.sb_dblocks - fdblocks) { + do_warn(_("used blocks %" PRIu64 ", counted %" PRIu64 "\n"), + mp->m_sb.sb_dblocks - fdblocks, usedblocks); + } } diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index 85a012b..933986a 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -416,6 +416,8 @@ calc_mkfs(xfs_mount_t *mp) fino_bno = inobt_root + (2 * min(2, mp->m_ag_maxlevels)) + 1; if (xfs_sb_version_hasfinobt(&mp->m_sb)) fino_bno++; + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + fino_bno++; /* * If the log is allocated in the first allocation group we need to _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs