On Thu, Nov 14, 2019 at 10:19:46AM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > Whenever we encounter corrupt directory or extended attribute blocks, we > should report that to the health monitoring system for later reporting. > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > fs/xfs/libxfs/xfs_attr_leaf.c | 5 ++++- > fs/xfs/libxfs/xfs_attr_remote.c | 27 ++++++++++++++++----------- > fs/xfs/libxfs/xfs_da_btree.c | 29 ++++++++++++++++++++++++++--- > fs/xfs/libxfs/xfs_dir2.c | 5 ++++- > fs/xfs/libxfs/xfs_dir2_data.c | 2 ++ > fs/xfs/libxfs/xfs_dir2_leaf.c | 3 +++ > fs/xfs/libxfs/xfs_dir2_node.c | 7 +++++++ > fs/xfs/libxfs/xfs_health.h | 3 +++ > fs/xfs/xfs_attr_inactive.c | 4 ++++ > fs/xfs/xfs_attr_list.c | 16 +++++++++++++--- > fs/xfs/xfs_dir2_readdir.c | 6 +++++- > fs/xfs/xfs_health.c | 39 +++++++++++++++++++++++++++++++++++++++ > 12 files changed, 126 insertions(+), 20 deletions(-) > > ... > diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c > index e424b004e3cb..a17622dadf00 100644 > --- a/fs/xfs/libxfs/xfs_da_btree.c > +++ b/fs/xfs/libxfs/xfs_da_btree.c ... > @@ -1589,6 +1593,7 @@ xfs_da3_node_lookup_int( > > if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { > xfs_buf_corruption_error(blk->bp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > @@ -1604,6 +1609,7 @@ xfs_da3_node_lookup_int( > /* Tree taller than we can handle; bail out! */ > if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { > xfs_buf_corruption_error(blk->bp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > @@ -1612,6 +1618,7 @@ xfs_da3_node_lookup_int( > expected_level = nodehdr.level - 1; > else if (expected_level != nodehdr.level) { > xfs_buf_corruption_error(blk->bp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } else > expected_level--; > @@ -1663,12 +1670,16 @@ xfs_da3_node_lookup_int( > } > > /* We can't point back to the root. */ > - if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) > + if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) { > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > + } > } > > - if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) > + if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) { > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > + } > > /* > * A leaf block that ends in the hashval that we are interested in > @@ -1686,6 +1697,7 @@ xfs_da3_node_lookup_int( > args->blkno = blk->blkno; > } else { > ASSERT(0); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } I'm just kind of skimming through the rest for general feedback at this point given previous comments, but it might be nice to start using exit labels at some of these places where we're enlarging and duplicating the error path for particular errors. It's not so much about the code in these patches, but rather to hopefully ease maintaining these state bits properly in new code where devs/reviewers might not know much about scrub state or have it in mind. Short of having some kind of generic helper to handle corruption state, ISTM that the combination of using verifiers where possible and common exit labels anywhere else we generate -EFSCORRUPTED at multiple places within some function could shrink these patches a bit.. Brian > if (((retval == -ENOENT) || (retval == -ENOATTR)) && > @@ -2250,8 +2262,10 @@ xfs_da3_swap_lastblock( > error = xfs_bmap_last_before(tp, dp, &lastoff, w); > if (error) > return error; > - if (XFS_IS_CORRUPT(mp, lastoff == 0)) > + if (XFS_IS_CORRUPT(mp, lastoff == 0)) { > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > + } > /* > * Read the last block in the btree space. > */ > @@ -2300,6 +2314,7 @@ xfs_da3_swap_lastblock( > if (XFS_IS_CORRUPT(mp, > be32_to_cpu(sib_info->forw) != last_blkno || > sib_info->magic != dead_info->magic)) { > + xfs_da_mark_sick(args); > error = -EFSCORRUPTED; > goto done; > } > @@ -2320,6 +2335,7 @@ xfs_da3_swap_lastblock( > if (XFS_IS_CORRUPT(mp, > be32_to_cpu(sib_info->back) != last_blkno || > sib_info->magic != dead_info->magic)) { > + xfs_da_mark_sick(args); > error = -EFSCORRUPTED; > goto done; > } > @@ -2342,6 +2358,7 @@ xfs_da3_swap_lastblock( > xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); > if (XFS_IS_CORRUPT(mp, > level >= 0 && level != par_hdr.level + 1)) { > + xfs_da_mark_sick(args); > error = -EFSCORRUPTED; > goto done; > } > @@ -2353,6 +2370,7 @@ xfs_da3_swap_lastblock( > entno++) > continue; > if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) { > + xfs_da_mark_sick(args); > error = -EFSCORRUPTED; > goto done; > } > @@ -2378,6 +2396,7 @@ xfs_da3_swap_lastblock( > xfs_trans_brelse(tp, par_buf); > par_buf = NULL; > if (XFS_IS_CORRUPT(mp, par_blkno == 0)) { > + xfs_da_mark_sick(args); > error = -EFSCORRUPTED; > goto done; > } > @@ -2387,6 +2406,7 @@ xfs_da3_swap_lastblock( > par_node = par_buf->b_addr; > xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); > if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) { > + xfs_da_mark_sick(args); > error = -EFSCORRUPTED; > goto done; > } > @@ -2601,6 +2621,7 @@ xfs_dabuf_map( > irecs[i].br_state); > } > } > + xfs_dirattr_mark_sick(dp, whichfork); > error = -EFSCORRUPTED; > goto out; > } > @@ -2693,6 +2714,8 @@ xfs_da_read_buf( > error = xfs_trans_read_buf_map(dp->i_mount, trans, > dp->i_mount->m_ddev_targp, > mapp, nmap, 0, &bp, ops); > + if (xfs_metadata_is_sick(error)) > + xfs_dirattr_mark_sick(dp, whichfork); > if (error) > goto out_free; > > diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c > index 0aa87cbde49e..e1aa411a1b8b 100644 > --- a/fs/xfs/libxfs/xfs_dir2.c > +++ b/fs/xfs/libxfs/xfs_dir2.c > @@ -18,6 +18,7 @@ > #include "xfs_errortag.h" > #include "xfs_error.h" > #include "xfs_trace.h" > +#include "xfs_health.h" > > struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; > > @@ -608,8 +609,10 @@ xfs_dir2_isblock( > rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; > if (XFS_IS_CORRUPT(args->dp->i_mount, > rval != 0 && > - args->dp->i_d.di_size != args->geo->blksize)) > + args->dp->i_d.di_size != args->geo->blksize)) { > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > + } > *vp = rval; > return 0; > } > diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c > index a6eb71a62b53..80cc9c7ea4e5 100644 > --- a/fs/xfs/libxfs/xfs_dir2_data.c > +++ b/fs/xfs/libxfs/xfs_dir2_data.c > @@ -18,6 +18,7 @@ > #include "xfs_trans.h" > #include "xfs_buf_item.h" > #include "xfs_log.h" > +#include "xfs_health.h" > > static xfs_failaddr_t xfs_dir2_data_freefind_verify( > struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, > @@ -1170,6 +1171,7 @@ xfs_dir2_data_use_free( > corrupt: > xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount, > hdr, sizeof(*hdr), __FILE__, __LINE__, fa); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c > index 73edd96ce0ac..32d17420fff3 100644 > --- a/fs/xfs/libxfs/xfs_dir2_leaf.c > +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c > @@ -19,6 +19,7 @@ > #include "xfs_trace.h" > #include "xfs_trans.h" > #include "xfs_buf_item.h" > +#include "xfs_health.h" > > /* > * Local function declarations. > @@ -1386,8 +1387,10 @@ xfs_dir2_leaf_removename( > bestsp = xfs_dir2_leaf_bests_p(ltp); > if (be16_to_cpu(bestsp[db]) != oldbest) { > xfs_buf_corruption_error(lbp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > + > /* > * Mark the former data entry unused. > */ > diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c > index 3a8b0625a08b..e0f3ab254a1a 100644 > --- a/fs/xfs/libxfs/xfs_dir2_node.c > +++ b/fs/xfs/libxfs/xfs_dir2_node.c > @@ -20,6 +20,7 @@ > #include "xfs_trans.h" > #include "xfs_buf_item.h" > #include "xfs_log.h" > +#include "xfs_health.h" > > /* > * Function declarations. > @@ -228,6 +229,7 @@ __xfs_dir3_free_read( > if (fa) { > xfs_verifier_error(*bpp, -EFSCORRUPTED, fa); > xfs_trans_brelse(tp, *bpp); > + xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); > return -EFSCORRUPTED; > } > > @@ -440,6 +442,7 @@ xfs_dir2_leaf_to_node( > if (be32_to_cpu(ltp->bestcount) > > (uint)dp->i_d.di_size / args->geo->blksize) { > xfs_buf_corruption_error(lbp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > @@ -514,6 +517,7 @@ xfs_dir2_leafn_add( > */ > if (index < 0) { > xfs_buf_corruption_error(bp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > @@ -733,6 +737,7 @@ xfs_dir2_leafn_lookup_for_addname( > cpu_to_be16(NULLDATAOFF))) { > if (curfdb != newfdb) > xfs_trans_brelse(tp, curbp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > curfdb = newfdb; > @@ -801,6 +806,7 @@ xfs_dir2_leafn_lookup_for_entry( > xfs_dir3_leaf_check(dp, bp); > if (leafhdr.count <= 0) { > xfs_buf_corruption_error(bp); > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > @@ -1737,6 +1743,7 @@ xfs_dir2_node_add_datablk( > } else { > xfs_alert(mp, " ... fblk is NULL"); > } > + xfs_da_mark_sick(args); > return -EFSCORRUPTED; > } > > diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h > index 2049419e9555..d9404cd3d09b 100644 > --- a/fs/xfs/libxfs/xfs_health.h > +++ b/fs/xfs/libxfs/xfs_health.h > @@ -38,6 +38,7 @@ struct xfs_perag; > struct xfs_inode; > struct xfs_fsop_geom; > struct xfs_btree_cur; > +struct xfs_da_args; > > /* Observable health issues for metadata spanning the entire filesystem. */ > #define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */ > @@ -141,6 +142,8 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick, > void xfs_health_unmount(struct xfs_mount *mp); > void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork); > void xfs_btree_mark_sick(struct xfs_btree_cur *cur); > +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork); > +void xfs_da_mark_sick(struct xfs_da_args *args); > > /* Now some helpers. */ > > diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c > index a78c501f6fb1..429a97494ffa 100644 > --- a/fs/xfs/xfs_attr_inactive.c > +++ b/fs/xfs/xfs_attr_inactive.c > @@ -23,6 +23,7 @@ > #include "xfs_quota.h" > #include "xfs_dir2.h" > #include "xfs_error.h" > +#include "xfs_health.h" > > /* > * Look at all the extents for this logical region, > @@ -209,6 +210,7 @@ xfs_attr3_node_inactive( > if (level > XFS_DA_NODE_MAXDEPTH) { > xfs_trans_brelse(*trans, bp); /* no locks for later trans */ > xfs_buf_corruption_error(bp); > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > return -EFSCORRUPTED; > } > > @@ -256,6 +258,7 @@ xfs_attr3_node_inactive( > error = xfs_attr3_leaf_inactive(trans, dp, child_bp); > break; > default: > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > xfs_buf_corruption_error(child_bp); > xfs_trans_brelse(*trans, child_bp); > error = -EFSCORRUPTED; > @@ -342,6 +345,7 @@ xfs_attr3_root_inactive( > error = xfs_attr3_leaf_inactive(trans, dp, bp); > break; > default: > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > error = -EFSCORRUPTED; > xfs_buf_corruption_error(bp); > xfs_trans_brelse(*trans, bp); > diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c > index 7a099df88a0c..1a2a3d4ce422 100644 > --- a/fs/xfs/xfs_attr_list.c > +++ b/fs/xfs/xfs_attr_list.c > @@ -21,6 +21,7 @@ > #include "xfs_error.h" > #include "xfs_trace.h" > #include "xfs_dir2.h" > +#include "xfs_health.h" > > STATIC int > xfs_attr_shortform_compare(const void *a, const void *b) > @@ -88,8 +89,10 @@ xfs_attr_shortform_list( > for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { > if (XFS_IS_CORRUPT(context->dp->i_mount, > !xfs_attr_namecheck(sfe->nameval, > - sfe->namelen))) > + sfe->namelen))) { > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > return -EFSCORRUPTED; > + } > context->put_listent(context, > sfe->flags, > sfe->nameval, > @@ -131,6 +134,7 @@ xfs_attr_shortform_list( > context->dp->i_mount, sfe, > sizeof(*sfe)); > kmem_free(sbuf); > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > return -EFSCORRUPTED; > } > > @@ -181,6 +185,7 @@ xfs_attr_shortform_list( > if (XFS_IS_CORRUPT(context->dp->i_mount, > !xfs_attr_namecheck(sbp->name, > sbp->namelen))) { > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > error = -EFSCORRUPTED; > goto out; > } > @@ -268,8 +273,10 @@ xfs_attr_node_list_lookup( > return 0; > > /* We can't point back to the root. */ > - if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) > + if (XFS_IS_CORRUPT(mp, cursor->blkno == 0)) { > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > return -EFSCORRUPTED; > + } > } > > if (expected_level != 0) > @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup( > out_corruptbuf: > xfs_buf_corruption_error(bp); > xfs_trans_brelse(tp, bp); > + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); > return -EFSCORRUPTED; > } > > @@ -471,8 +479,10 @@ xfs_attr3_leaf_list_int( > } > > if (XFS_IS_CORRUPT(context->dp->i_mount, > - !xfs_attr_namecheck(name, namelen))) > + !xfs_attr_namecheck(name, namelen))) { > + xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK); > return -EFSCORRUPTED; > + } > context->put_listent(context, entry->flags, > name, namelen, valuelen); > if (context->seen_enough) > diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c > index 95bc9ef8f5f9..715ded503334 100644 > --- a/fs/xfs/xfs_dir2_readdir.c > +++ b/fs/xfs/xfs_dir2_readdir.c > @@ -18,6 +18,7 @@ > #include "xfs_bmap.h" > #include "xfs_trans.h" > #include "xfs_error.h" > +#include "xfs_health.h" > > /* > * Directory file type support functions > @@ -119,8 +120,10 @@ xfs_dir2_sf_getdents( > ctx->pos = off & 0x7fffffff; > if (XFS_IS_CORRUPT(dp->i_mount, > !xfs_dir2_namecheck(sfep->name, > - sfep->namelen))) > + sfep->namelen))) { > + xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); > return -EFSCORRUPTED; > + } > if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, > xfs_dir3_get_dtype(mp, filetype))) > return 0; > @@ -461,6 +464,7 @@ xfs_dir2_leaf_getdents( > if (XFS_IS_CORRUPT(dp->i_mount, > !xfs_dir2_namecheck(dep->name, > dep->namelen))) { > + xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); > error = -EFSCORRUPTED; > break; > } > diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c > index 1f09027c55ad..c1b6e8fb72ec 100644 > --- a/fs/xfs/xfs_health.c > +++ b/fs/xfs/xfs_health.c > @@ -15,6 +15,8 @@ > #include "xfs_trace.h" > #include "xfs_health.h" > #include "xfs_btree.h" > +#include "xfs_da_format.h" > +#include "xfs_da_btree.h" > > /* > * Warn about metadata corruption that we detected but haven't fixed, and > @@ -517,3 +519,40 @@ xfs_btree_mark_sick( > > xfs_agno_mark_sick(cur->bc_mp, cur->bc_private.a.agno, mask); > } > + > +/* > + * Record observations of dir/attr btree corruption with the health tracking > + * system. > + */ > +void > +xfs_dirattr_mark_sick( > + struct xfs_inode *ip, > + int whichfork) > +{ > + unsigned int mask; > + > + switch (whichfork) { > + case XFS_DATA_FORK: > + mask = XFS_SICK_INO_DIR; > + break; > + case XFS_ATTR_FORK: > + mask = XFS_SICK_INO_XATTR; > + break; > + default: > + ASSERT(0); > + return; > + } > + > + xfs_inode_mark_sick(ip, mask); > +} > + > +/* > + * Record observations of dir/attr btree corruption with the health tracking > + * system. > + */ > +void > +xfs_da_mark_sick( > + struct xfs_da_args *args) > +{ > + xfs_dirattr_mark_sick(args->dp, args->whichfork); > +} >