Re: [PATCH 20/25] xfs: scrub directory freespace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 03, 2017 at 01:42:55PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> Check the free space information in a directory.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
>  fs/xfs/scrub/dir.c |  347 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 347 insertions(+)
> 
> 
> diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
> index e58252b..6ea06c3 100644
> --- a/fs/xfs/scrub/dir.c
> +++ b/fs/xfs/scrub/dir.c
> @@ -239,6 +239,348 @@ xfs_scrub_dir_rec(
>  	return error;
>  }
>  
> +/* Is this free entry either in the bestfree or smaller than all of them? */
> +static inline void
> +xfs_scrub_directory_check_free_entry(
> +	struct xfs_scrub_context	*sc,
> +	xfs_dablk_t			lblk,
> +	struct xfs_dir2_data_free	*bf,
> +	struct xfs_dir2_data_unused	*dup)
> +{
> +	struct xfs_dir2_data_free	*dfp;
> +	unsigned int			smallest;
> +
> +	smallest = -1U;

Urk. That's the same as "smallest = UINT_MAX", and so ......

> +	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
> +		if (dfp->offset &&
> +		    be16_to_cpu(dfp->length) == be16_to_cpu(dup->length))
> +			return;
> +		if (smallest < be16_to_cpu(dfp->length))
> +			smallest = be16_to_cpu(dfp->length);

.... how does this work? Shouldn't it be a ">" check here?


> +	}
> +
> +	if (be16_to_cpu(dup->length) > smallest)
> +		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +}
> +
> +/* Check free space info in a directory data block. */
> +STATIC int
> +xfs_scrub_directory_data_bestfree(
> +	struct xfs_scrub_context	*sc,
> +	xfs_dablk_t			lblk,
> +	bool				is_block)
> +{
> +	struct xfs_dir2_data_unused	*dup;
> +	struct xfs_dir2_data_free	*dfp;
> +	struct xfs_buf			*bp;
> +	struct xfs_dir2_data_free	*bf;
> +	struct xfs_mount		*mp = sc->mp;
> +	char				*ptr;
> +	char				*endptr;
> +	u16				tag;
> +	int				newlen;
> +	int				offset;
> +	int				error;
> +
> +	if (is_block) {
> +		/* dir block format */
> +		if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +		error = xfs_dir3_block_read(sc->tp, sc->ip, &bp);
> +	} else {
> +		/* dir data format */
> +		error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp);
> +	}
> +	if (!xfs_scrub_fblock_op_ok(sc, XFS_DATA_FORK, lblk, &error))
> +		goto out;
> +
> +	/* Do the bestfrees correspond to actual free space? */
> +	bf = sc->ip->d_ops->data_bestfree_p(bp->b_addr);

With the number of d_ops callouts in this code, a local dops
variable might be in order.

> +	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
> +		offset = be16_to_cpu(dfp->offset);
> +		if (offset == 0)
> +			continue;
> +		if (offset >= BBTOB(bp->b_length)) {
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +			continue;
> +		}

Not sure I like all the checks against and calculations using
bp->b_length in this function. it would be more correct to check
against geo->blksize.

> +		dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset);
> +		tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
> +
> +		if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
> +		    be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
> +		    tag != ((char *)dup - (char *)bp->b_addr))
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +	}

Also, count the number of best frees here.

> +
> +	/* Make sure the bestfrees are actually the best free spaces. */
> +	ptr = (char *)sc->ip->d_ops->data_entry_p(bp->b_addr);
> +	if (is_block) {
> +		struct xfs_dir2_block_tail	*btp;
> +
> +		btp = xfs_dir2_block_tail_p(sc->mp->m_dir_geo, bp->b_addr);

mp->m_dir_geo

> +		endptr = (char *)xfs_dir2_block_leaf_p(btp);
> +	} else
> +		endptr = (char *)bp->b_addr + BBTOB(bp->b_length);




> +	while (ptr < endptr) {
> +		dup = (struct xfs_dir2_data_unused *)ptr;
> +		/* Skip real entries */
> +		if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
> +			struct xfs_dir2_data_entry	*dep;
> +
> +			dep = (struct xfs_dir2_data_entry *)ptr;
> +			newlen = sc->ip->d_ops->data_entsize(dep->namelen);
> +			if (newlen <= 0) {
> +				xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
> +						lblk);
> +				goto out_buf;
> +			}
> +			ptr += newlen;
> +			if (endptr < ptr)
> +				xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
> +					      lblk);
> +			continue;
> +		}
> +
> +		/* Spot check this free entry */
> +		tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
> +		if (tag != ((char *)dup - (char *)bp->b_addr))
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +
> +		/*
> +		 * Either this entry is a bestfree or it's smaller than
> +		 * any of the bestfrees.
> +		 */
> +		xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup);

SO this checks if the entry is in the bestfree, but it doesn't
tell us if the bestfree array has the correct number of entries...

> +
> +		/* Move on. */
> +		newlen = be16_to_cpu(dup->length);
> +		if (newlen <= 0) {
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +			goto out_buf;
> +		}
> +		ptr += newlen;
> +		if (endptr < ptr)
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);

Count the number of free entries here.

> +	}

And now check that the number of bestfrees vs free entries is
valid. If there's more than 3 free entries in the block, the
bestfrees array should be full...

> +out_buf:
> +	xfs_trans_brelse(sc->tp, bp);
> +out:
> +	return error;
> +}
> +
> +/* Is this the longest free entry in the block? */
> +static inline void
> +xfs_scrub_directory_check_freesp(
> +	struct xfs_scrub_context	*sc,
> +	xfs_dablk_t			lblk,
> +	struct xfs_buf			*dbp,
> +	unsigned int			len)
> +{
> +	struct xfs_dir2_data_free	*bf;
> +	struct xfs_dir2_data_free	*dfp;
> +	unsigned int			longest = 0;
> +	int				offset;
> +
> +	bf = sc->ip->d_ops->data_bestfree_p(dbp->b_addr);
> +	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
> +		offset = be16_to_cpu(dfp->offset);
> +		if (!offset)
> +			continue;
> +		if (longest < be16_to_cpu(dfp->length))
> +			longest = be16_to_cpu(dfp->length);
> +	}
> +
> +	if (longest != len)
> +		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +}

This needs a better explanation - it's called to check whether then
freespace length in the freespace index matches the longest
freespace in the data block bests array.

And from that, the data block bests array is supposed to be ordered
from largest to smallest, yes? As per __xfs_dir3_data_check():


	XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
						be16_to_cpu(bf[1].length));
	XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
						be16_to_cpu(bf[2].length));

So why doesn't this code just check the first entry in the array?

Hmmm, and now I've remembered that, xfs_scrub_directory_check_free_entry()
probably only needs to do a reverse scan just to find the smallest
non-zero entry...

> +/* Check free space info in a directory leaf1 block. */
> +STATIC int
> +xfs_scrub_directory_leaf1_bestfree(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_da_args		*args,
> +	xfs_dablk_t			lblk)
> +{
> +	struct xfs_dir2_leaf_tail	*ltp;
> +	struct xfs_buf			*dbp;
> +	struct xfs_buf			*bp;
> +	struct xfs_mount		*mp = sc->mp;
> +	__be16				*bestp;
> +	__u16				best;
> +	int				i;
> +	int				error;
> +
> +	/* Read the free space block */
> +	error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
> +	if (!xfs_scrub_fblock_op_ok(sc, XFS_DATA_FORK, lblk, &error))
> +		goto out;
> +
> +	/* Check all the entries. */
> +	ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, bp->b_addr);
> +	bestp = xfs_dir2_leaf_bests_p(ltp);
> +	for (i = 0; i < be32_to_cpu(ltp->bestcount); i++, bestp++) {
> +		best = be16_to_cpu(*bestp);
> +		if (best == NULLDATAOFF)
> +			continue;
> +		error = xfs_dir3_data_read(sc->tp, sc->ip,
> +				i * args->geo->fsbcount, -1, &dbp);
> +		if (!xfs_scrub_fblock_op_ok(sc, XFS_DATA_FORK, lblk, &error))
> +			continue;
> +		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
> +		xfs_trans_brelse(sc->tp, dbp);
> +	}
> +out:
> +	return error;

This needs comments to explain what it is not checking because those
checks were done in the verifier. (i.e.  hash index does not overlap
the freespace index, stale entry count is valid).

hmmmm. More philosophical question: should we rerun the verifiers
in the scrubber manually so guarantee that we fully cover whatever
is in memory on cached and modified buffers?

> +
> +/* Check free space info in a directory freespace block. */
> +STATIC int
> +xfs_scrub_directory_free_bestfree(
> +	struct xfs_scrub_context	*sc,
> +	struct xfs_da_args		*args,
> +	xfs_dablk_t			lblk)
> +{
> +	struct xfs_dir3_icfree_hdr	freehdr;
> +	struct xfs_buf			*dbp;
> +	struct xfs_buf			*bp;
> +	__be16				*bestp;
> +	__be16				best;
> +	int				i;
> +	int				error;
> +
> +	/* Read the free space block */
> +	error = xfs_dir2_free_read(sc->tp, sc->ip, lblk, &bp);
> +	if (!xfs_scrub_fblock_op_ok(sc, XFS_DATA_FORK, lblk, &error))
> +		goto out;
> +
> +	/* Check all the entries. */
> +	sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr);
> +	bestp = sc->ip->d_ops->free_bests_p(bp->b_addr);
> +	for (i = 0; i < freehdr.nvalid; i++, bestp++) {
> +		best = be16_to_cpu(*bestp);
> +		if (best == NULLDATAOFF)
> +			continue;
> +		error = xfs_dir3_data_read(sc->tp, sc->ip,
> +				(freehdr.firstdb + i) * args->geo->fsbcount,
> +				-1, &dbp);
> +		if (!xfs_scrub_fblock_op_ok(sc, XFS_DATA_FORK, lblk, &error))
> +			continue;
> +		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
> +		xfs_trans_brelse(sc->tp, dbp);
> +	}
> +out:
> +	return error;
> +}
> +
> +/* Check free space information in directories. */
> +STATIC int
> +xfs_scrub_directory_blocks(
> +	struct xfs_scrub_context	*sc)
> +{
> +	struct xfs_bmbt_irec		got;
> +	struct xfs_da_args		args;
> +	struct xfs_ifork		*ifp;
> +	struct xfs_mount		*mp = sc->mp;
> +	xfs_fileoff_t			leaf_lblk;
> +	xfs_fileoff_t			free_lblk;
> +	xfs_fileoff_t			lblk;
> +	xfs_extnum_t			idx;
> +	bool				found;
> +	int				is_block = 0;
> +	int				error;
> +
> +	/* Ignore local format directories. */
> +	if (sc->ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
> +	    sc->ip->i_d.di_format != XFS_DINODE_FMT_BTREE)
> +		return 0;
> +
> +	ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
> +	lblk = XFS_B_TO_FSB(mp, XFS_DIR2_DATA_OFFSET);
> +	leaf_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_LEAF_OFFSET);
> +	free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
> +
> +	/* Is this a block dir? */
> +	args.dp = sc->ip;
> +	args.geo = mp->m_dir_geo;
> +	args.trans = sc->tp;
> +	error = xfs_dir2_isblock(&args, &is_block);
> +	if (!xfs_scrub_fblock_op_ok(sc, XFS_DATA_FORK, lblk, &error))
> +		goto out;
> +
> +	/* Iterate all the data extents in the directory... */
> +	found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &idx, &got);
> +	while (found) {
> +		/* No more data blocks... */
> +		if (got.br_startoff >= leaf_lblk)
> +			break;

If it's a block dir and got.br_startoff > 0, then it's corrupt?

> +
> +		/* Check each data block's bestfree data */
> +		for (lblk = roundup((xfs_dablk_t)got.br_startoff,
> +				args.geo->fsbcount);
> +		     lblk < got.br_startoff + got.br_blockcount;
> +		     lblk += args.geo->fsbcount) {

This is not obvious as to why it works with discontiguous directory
blocks. I think it's because it grabs the aligned start block of
each directory block and then internally the blocks get mapped
correctly via the directory block read functions, but this
definitely needs a better comment explaining the iteration mechanism
being used here....

> +			error = xfs_scrub_directory_data_bestfree(sc, lblk,
> +					is_block);
> +			if (error)
> +				goto out;
> +		}
> +
> +		found = xfs_iext_get_extent(ifp, ++idx, &got);

As it is, I think this is going to check discontiguous directory
blocks multiple times. It's going to find each extent in a
discontiguous dir block, round it up to the next dirblock and
scan that next dirblock. It then finds the next block in the
current discontig block, rounds it up to the next dirblock, and
scans it again....

I think it would be much better to use xfs_iext_lookup_extent() here
to iterate by expected start block rather than iterating by extent
index.

> +	}
> +
> +	/* Look for a leaf1 block, which has free info. */
> +	if (xfs_iext_lookup_extent(sc->ip, ifp, leaf_lblk, &idx, &got) &&
> +	    got.br_startoff == leaf_lblk &&
> +	    got.br_blockcount == args.geo->fsbcount &&
> +	    !xfs_iext_get_extent(ifp, ++idx, &got)) {
> +		if (is_block) {
> +			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
> +			goto not_leaf1;

Can just abort the scrub at this point.

Cheers,

Dave.

-- 
Dave Chinner
david@xxxxxxxxxxxxx
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux