Re: [PATCH 14/21] xfs: zap broken inode forks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Jun 24, 2018 at 12:24:57PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> Determine if inode fork damage is responsible for the inode being unable
> to pass the ifork verifiers in xfs_iget and zap the fork contents if
> this is true.  Once this is done the fork will be empty but we'll be
> able to construct an in-core inode, and a subsequent call to the inode
> fork repair ioctl will search the rmapbt to rebuild the records that
> were in the fork.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
>  fs/xfs/libxfs/xfs_attr_leaf.c |   32 ++-
>  fs/xfs/libxfs/xfs_attr_leaf.h |    2 
>  fs/xfs/libxfs/xfs_bmap.c      |   21 ++
>  fs/xfs/libxfs/xfs_bmap.h      |    2 
>  fs/xfs/scrub/inode_repair.c   |  399 +++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 437 insertions(+), 19 deletions(-)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
> index b3c19339e1b5..f6c458104934 100644
> --- a/fs/xfs/libxfs/xfs_attr_leaf.c
> +++ b/fs/xfs/libxfs/xfs_attr_leaf.c
> @@ -894,23 +894,16 @@ xfs_attr_shortform_allfit(
>  	return xfs_attr_shortform_bytesfit(dp, bytes);
>  }
>  
> -/* Verify the consistency of an inline attribute fork. */
> +/* Verify the consistency of a raw inline attribute fork. */
>  xfs_failaddr_t
> -xfs_attr_shortform_verify(
> -	struct xfs_inode		*ip)
> +xfs_attr_shortform_verify_struct(
> +	struct xfs_attr_shortform	*sfp,
> +	size_t				size)

The internal structure checking functions in the directory code
use the naming convention xfs_dir3_<type>_check(). I think we should use
the same here. i.e. xfs_attr_sf_check().

> diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
> index b7f094e19bab..b1254e6c17b5 100644
> --- a/fs/xfs/libxfs/xfs_bmap.c
> +++ b/fs/xfs/libxfs/xfs_bmap.c
> @@ -6223,18 +6223,16 @@ xfs_bmap_finish_one(
>  	return error;
>  }
>  
> -/* Check that an inode's extent does not have invalid flags or bad ranges. */
> +/* Check that an extent does not have invalid flags or bad ranges. */
>  xfs_failaddr_t
> -xfs_bmap_validate_extent(
> -	struct xfs_inode	*ip,
> +xfs_bmbt_validate_extent(

xfs_bmbt_ prefixes should only appear in xfs_bmap_btree.c, not
xfs_bmap.c....

So either it needs to get moved or renamed to something like
xfs_bmap_validate_irec()?


> diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
> index 4ac43c1b1eb0..b941f21d7667 100644
> --- a/fs/xfs/scrub/inode_repair.c
> +++ b/fs/xfs/scrub/inode_repair.c
> @@ -22,11 +22,15 @@
>  #include "xfs_ialloc.h"
>  #include "xfs_da_format.h"
>  #include "xfs_reflink.h"
> +#include "xfs_alloc.h"
>  #include "xfs_rmap.h"
> +#include "xfs_rmap_btree.h"
>  #include "xfs_bmap.h"
> +#include "xfs_bmap_btree.h"
>  #include "xfs_bmap_util.h"
>  #include "xfs_dir2.h"
>  #include "xfs_quota_defs.h"
> +#include "xfs_attr_leaf.h"
>  #include "scrub/xfs_scrub.h"
>  #include "scrub/scrub.h"
>  #include "scrub/common.h"
> @@ -113,7 +117,8 @@ xfs_repair_inode_mode(
>  STATIC void
>  xfs_repair_inode_flags(
>  	struct xfs_scrub_context	*sc,
> -	struct xfs_dinode		*dip)
> +	struct xfs_dinode		*dip,
> +	bool				is_rt_file)
>  {
>  	struct xfs_mount		*mp = sc->mp;
>  	uint64_t			flags2;
> @@ -132,6 +137,10 @@ xfs_repair_inode_flags(
>  		flags2 &= ~XFS_DIFLAG2_REFLINK;
>  	if (flags2 & XFS_DIFLAG2_REFLINK)
>  		flags2 &= ~XFS_DIFLAG2_DAX;
> +	if (is_rt_file)
> +		flags |= XFS_DIFLAG_REALTIME;
> +	else
> +		flags &= ~XFS_DIFLAG_REALTIME;

This needs to be done first. i.e. before we check things like rt vs
reflink flags.

> @@ -210,17 +219,402 @@ xfs_repair_inode_extsize_hints(
>  	}
>  }
>  
> +struct xfs_repair_inode_fork_counters {
> +	struct xfs_scrub_context	*sc;
> +	xfs_rfsblock_t			data_blocks;
> +	xfs_rfsblock_t			rt_blocks;

inode is either data or rt, not both. Why do you need two separate
counters? Oh, bmbt blocks are always data, right? Coment, perhaps?

> +	xfs_rfsblock_t			attr_blocks;
> +	xfs_extnum_t			data_extents;
> +	xfs_extnum_t			rt_extents;

but bmbt blocks are not data extents, so only one counter here?

> +/* Count extents and blocks for a given inode from all rmap data. */
> +STATIC int
> +xfs_repair_inode_count_rmaps(
> +	struct xfs_repair_inode_fork_counters	*rifc)
> +{
> +	xfs_agnumber_t			agno;
> +	int				error;
> +
> +	if (!xfs_sb_version_hasrmapbt(&rifc->sc->mp->m_sb) ||
> +	    xfs_sb_version_hasrealtime(&rifc->sc->mp->m_sb))
> +		return -EOPNOTSUPP;
> +
> +	/* XXX: find rt blocks too */

Ok, needs a comment up front that realtime repair isn't supported,
rather than hiding it down here.

> +	for (agno = 0; agno < rifc->sc->mp->m_sb.sb_agcount; agno++) {
> +		error = xfs_repair_inode_count_ag_rmaps(rifc, agno);
> +		if (error)
> +			return error;
> +	}

	/* rt not supported yet */
	ASSERT(rifc->rt_extents == 0);

> +	/* Can't have extents on both the rt and the data device. */
> +	if (rifc->data_extents && rifc->rt_extents)
> +		return -EFSCORRUPTED;
> +
> +	return 0;
> +}
> +
> +/* Figure out if we need to zap this extents format fork. */
> +STATIC bool
> +xfs_repair_inode_core_check_extents_fork(


Urk. Not sure what this function is supposed to be doing from the
name. xrep_ifork_extent_check()? And then the next function
becomes xrep_ifork_btree_check()?

Also document the return values.

> +	struct xfs_scrub_context	*sc,
> +	struct xfs_dinode		*dip,
> +	int				dfork_size,
> +	int				whichfork)
> +{
> +	struct xfs_bmbt_irec		new;
> +	struct xfs_bmbt_rec		*dp;
> +	bool				isrt;
> +	int				i;
> +	int				nex;
> +	int				fork_size;
> +
> +	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
> +	fork_size = nex * sizeof(struct xfs_bmbt_rec);
> +	if (fork_size < 0 || fork_size > dfork_size)
> +		return true;

Check nex against dip->di_nextents?

> +	dp = (struct xfs_bmbt_rec *)XFS_DFORK_PTR(dip, whichfork);
> +
> +	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
> +	for (i = 0; i < nex; i++, dp++) {
> +		xfs_failaddr_t	fa;
> +
> +		xfs_bmbt_disk_get_all(dp, &new);
> +		fa = xfs_bmbt_validate_extent(sc->mp, isrt, whichfork, &new);
> +		if (fa)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +/* Figure out if we need to zap this btree format fork. */
> +STATIC bool
> +xfs_repair_inode_core_check_btree_fork(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_dinode		*dip,
> +	int				dfork_size,
> +	int				whichfork)
> +{
> +	struct xfs_bmdr_block		*dfp;
> +	int				nrecs;
> +	int				level;
> +
> +	if (XFS_DFORK_NEXTENTS(dip, whichfork) <=
> +			dfork_size / sizeof(struct xfs_bmbt_irec))
> +		return true;

check against dip->di_nextents?

> +	dfp = (struct xfs_bmdr_block *)XFS_DFORK_PTR(dip, whichfork);
> +	nrecs = be16_to_cpu(dfp->bb_numrecs);
> +	level = be16_to_cpu(dfp->bb_level);
> +
> +	if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size)
> +		return true;
> +	if (level == 0 || level > XFS_BTREE_MAXLEVELS)
> +		return true;

Should this visit the bmbt blocks to check the level is actually
correct?

> +	return false;
> +}
> +
> +/*
> + * Check the data fork for things that will fail the ifork verifiers or the
> + * ifork formatters.
> + */
> +STATIC bool
> +xfs_repair_inode_core_check_data_fork(

xrep_ifork_check_data()

> +	struct xfs_scrub_context	*sc,
> +	struct xfs_dinode		*dip,
> +	uint16_t			mode)
> +{
> +	uint64_t			size;
> +	int				dfork_size;
> +
> +	size = be64_to_cpu(dip->di_size);
> +	switch (mode & S_IFMT) {
> +	case S_IFIFO:
> +	case S_IFCHR:
> +	case S_IFBLK:
> +	case S_IFSOCK:
> +		if (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK) != XFS_DINODE_FMT_DEV)
> +			return true;
> +		break;
> +	case S_IFREG:
> +	case S_IFLNK:
> +	case S_IFDIR:
> +		switch (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK)) {
> +		case XFS_DINODE_FMT_LOCAL:

local format is not valid for S_IFREG.

> +		case XFS_DINODE_FMT_EXTENTS:
> +		case XFS_DINODE_FMT_BTREE:
> +			break;
> +		default:
> +			return true;
> +		}
> +		break;
> +	default:
> +		return true;
> +	}
> +	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
> +	switch (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK)) {
> +	case XFS_DINODE_FMT_DEV:
> +		break;
> +	case XFS_DINODE_FMT_LOCAL:
> +		if (size > dfork_size)
> +			return true;
> +		break;
> +	case XFS_DINODE_FMT_EXTENTS:
> +		if (xfs_repair_inode_core_check_extents_fork(sc, dip,
> +				dfork_size, XFS_DATA_FORK))
> +			return true;
> +		break;
> +	case XFS_DINODE_FMT_BTREE:
> +		if (xfs_repair_inode_core_check_btree_fork(sc, dip,
> +				dfork_size, XFS_DATA_FORK))
> +			return true;
> +		break;
> +	default:
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +/* Reset the data fork to something sane. */
> +STATIC void
> +xfs_repair_inode_core_zap_data_fork(

xrep_ifork_zap_data()

(you get the idea :P)

> +	struct xfs_scrub_context	*sc,
> +	struct xfs_dinode		*dip,
> +	uint16_t			mode,
> +	struct xfs_repair_inode_fork_counters	*rifc)

(structure names can change, too :)

> +{
> +	char				*p;
> +	const struct xfs_dir_ops	*ops;
> +	struct xfs_dir2_sf_hdr		*sfp;
> +	int				i8count;
> +
> +	/* Special files always get reset to DEV */
> +	switch (mode & S_IFMT) {
> +	case S_IFIFO:
> +	case S_IFCHR:
> +	case S_IFBLK:
> +	case S_IFSOCK:
> +		dip->di_format = XFS_DINODE_FMT_DEV;
> +		dip->di_size = 0;
> +		return;
> +	}
> +
> +	/*
> +	 * If we have data extents, reset to an empty map and hope the user
> +	 * will run the bmapbtd checker next.
> +	 */
> +	if (rifc->data_extents || rifc->rt_extents || S_ISREG(mode)) {
> +		dip->di_format = XFS_DINODE_FMT_EXTENTS;
> +		dip->di_nextents = 0;
> +		return;
> +	}

Does the userspace tool run the bmapbtd checker next?

> +	/* Otherwise, reset the local format to the minimum. */
> +	switch (mode & S_IFMT) {
> +	case S_IFLNK:
> +		/* Blow out symlink; now it points to root dir */
> +		dip->di_format = XFS_DINODE_FMT_LOCAL;
> +		dip->di_size = cpu_to_be64(1);
> +		p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
> +		*p = '/';

Maybe factor this so zero length symlinks can be set directly to the
same thing? FWIW, would making it point at '.' be better than '/' so
it always points within the same filesystem?

> +		break;
> +	case S_IFDIR:
> +		/*
> +		 * Blow out dir, make it point to the root.  In the
> +		 * future the direction repair will reconstruct this
> +		 * dir for us.
> +		 */

s/the direction//

> +		dip->di_format = XFS_DINODE_FMT_LOCAL;
> +		i8count = sc->mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
> +		ops = xfs_dir_get_ops(sc->mp, NULL);
> +		sfp = (struct xfs_dir2_sf_hdr *)XFS_DFORK_PTR(dip,
> +				XFS_DATA_FORK);
> +		sfp->count = 0;
> +		sfp->i8count = i8count;
> +		ops->sf_put_parent_ino(sfp, sc->mp->m_sb.sb_rootino);
> +		dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));

What happens now if this dir has an ancestor still pointing at it?
Haven't we just screwed the directory structure? How does this
interact with the dentry cache, esp. w.r.t. disconnected dentries
(filehandle lookups)?

> +/*
> + * Check the attr fork for things that will fail the ifork verifiers or the
> + * ifork formatters.
> + */
> +STATIC bool
> +xfs_repair_inode_core_check_attr_fork(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_dinode		*dip)
> +{
> +	struct xfs_attr_shortform	*sfp;
> +	int				size;
> +
> +	if (XFS_DFORK_BOFF(dip) == 0)
> +		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
> +		       dip->di_anextents != 0;
> +
> +	size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
> +	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
> +	case XFS_DINODE_FMT_LOCAL:
> +		sfp = (struct xfs_attr_shortform *)XFS_DFORK_PTR(dip,
> +				XFS_ATTR_FORK);

As a side note, we should make XFS_DFORK_PTR() return a void * so we
don't need casts like this.

Cheers,

Dave.
-- 
Dave Chinner
david@xxxxxxxxxxxxx
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux