Re: [PATCH 15/22] xfs: add CRCs to dir2/da node blocks

Ben Myers <bpm@xxxxxxx> · Mon, 22 Apr 2013 13:55:50 -0500

On Wed, Apr 03, 2013 at 04:11:25PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner at redhat.com>
> 
> Signed-off-by: Dave Chinner <dchinner at redhat.com>

A few nits below.

> ---
>  fs/xfs/xfs_attr.c      |   33 +-
>  fs/xfs/xfs_attr_leaf.c |   29 +-
>  fs/xfs/xfs_bmap.c      |    1 +
>  fs/xfs/xfs_da_btree.c  | 1395 +++++++++++++++++++++++++++++-------------------
>  fs/xfs/xfs_da_btree.h  |  106 +++-
>  fs/xfs/xfs_dir2_node.c |   26 +-
>  fs/xfs/xfs_trace.c     |    2 +-
>  7 files changed, 972 insertions(+), 620 deletions(-)
> 
> diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
> index 8886838..e03128c 100644
> --- a/fs/xfs/xfs_attr.c
> +++ b/fs/xfs/xfs_attr.c
> @@ -15,7 +15,6 @@
>   * along with this program; if not, write the Free Software Foundation,
>   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
>   */
> -
>  #include "xfs.h"
>  #include "xfs_fs.h"
>  #include "xfs_types.h"
> @@ -25,6 +24,7 @@
>  #include "xfs_sb.h"
>  #include "xfs_ag.h"
>  #include "xfs_mount.h"
> +#include "xfs_error.h"
>  #include "xfs_da_btree.h"
>  #include "xfs_bmap_btree.h"
>  #include "xfs_attr_sf.h"
> @@ -35,7 +35,6 @@
>  #include "xfs_bmap.h"
>  #include "xfs_attr.h"
>  #include "xfs_attr_leaf.h"
> -#include "xfs_error.h"

Why did xfs_error.h need to be moved?

> @@ -935,16 +936,16 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
>  	/*
>  	 * Set up the new root node.
>  	 */
> -	error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
> +	error = xfs_da3_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
>  	if (error)
>  		goto out;
>  	node = bp1->b_addr;
>  	leaf = bp2->b_addr;
>  	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
>  	/* both on-disk, don't endian-flip twice */
> -	node->btree[0].hashval =
> -		leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
> -	node->btree[0].before = cpu_to_be32(blkno);
> +	btree = xfs_da3_node_tree_p(node);
> +	btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
								       ^

Extra space.

> +static bool
> +xfs_da3_node_verify(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_mount	*mp = bp->b_target->bt_mount;
> -	struct xfs_da_node_hdr *hdr = bp->b_addr;
> -	int			block_ok = 0;
> -
> -	block_ok = hdr->info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC);
> -	block_ok = block_ok &&
> -			be16_to_cpu(hdr->level) > 0 &&
> -			be16_to_cpu(hdr->count) > 0 ;
> -	if (!block_ok) {
> -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
> -		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +	struct xfs_da_intnode	*hdr = bp->b_addr;
> +	struct xfs_da3_icnode_hdr ichdr;
> +
> +	xfs_da3_node_hdr_from_disk(&ichdr, hdr);
> +
> +	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +		struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
> +
> +		if (ichdr.magic != XFS_DA3_NODE_MAGIC)
> +			return false;
> +
> +		if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
> +			return false;
> +		if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
> +			return false;
> +	} else {
> +		if (ichdr.magic != XFS_DA_NODE_MAGIC)
> +			return false;
>  	}
> +	if (ichdr.level == 0)
> +		return false;
> +	if (ichdr.level > XFS_DA_NODE_MAXDEPTH)
> +		return false;
> +	if (ichdr.count == 0)
> +		return false;
> +
> +	/*
> +	 * we don't know if the node is for and attribute or directory tree,
> +	 * so only fail if the count is outside both bounds
> +	 */
> +	if (ichdr.count > mp->m_dir_node_ents &&
> +	    ichdr.count > mp->m_attr_node_ents)
> +		return false;
> +
> +	/* XXX: hash order check? */

Still planning to add that in a subsequent patch?

> @@ -178,33 +314,45 @@ xfs_da_node_read(
>   * Create the initial contents of an intermediate node.
>   */
>  int
> -xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
> -				 struct xfs_buf **bpp, int whichfork)
> +xfs_da3_node_create(
> +	struct xfs_da_args	*args,
> +	xfs_dablk_t		blkno,
> +	int			level,
> +	struct xfs_buf		**bpp,
> +	int			whichfork)
>  {
> -	xfs_da_intnode_t *node;
> -	struct xfs_buf *bp;
> -	int error;
> -	xfs_trans_t *tp;
> +	struct xfs_da_intnode	*node;
> +	struct xfs_trans	*tp = args->trans;
> +	struct xfs_mount	*mp = tp->t_mountp;
> +	struct xfs_da3_icnode_hdr ichdr = {0};
> +	struct xfs_buf		*bp;
> +	int			error;
>  
>  	trace_xfs_da_node_create(args);
> +	ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
>  
> -	tp = args->trans;
>  	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);

Looks like this function can return 0 with a null bp.  We should probably check
for that.

> @@ -320,8 +474,10 @@ xfs_da_split(xfs_da_state_t *state)
>  	 * just got bumped because of the addition of a new root node.
>  	 * There might be three blocks involved if a double split occurred,
>  	 * and the original block 0 could be at any position in the list.
> +	 *
> +	 * Note: the info structures being modified here for both v2 and v3 da
> +	 * headers, so we can do this linkage just using the v2 structures.

I had a hard time parsing that the first few times.  How about something like:

Note:  The xfs_da_blkinfo fields being modified here are shared by both
xfs_da_node_hdr and xfs_da3_intnode, and occur before the additional fields
added in xfs_da3_intnode, so this linkage can be done using just the v2
structures.

> @@ -591,12 +801,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
>  		 * Move the req'd B-tree elements from high in node1 to
>  		 * low in node2.
>  		 */
> -		be16_add_cpu(&node2->hdr.count, count);
> +		nodehdr2.count += count;
>  		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
> -		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
> -		btree_d = &node2->btree[0];
> +		btree_s = &btree1[nodehdr1.count- count];
						^ 
Add a space.

> @@ -926,35 +1172,34 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
>  	 * We prefer coalescing with the lower numbered sibling so as
>  	 * to shrink a directory over time.
>  	 */
> +	count  = state->node_ents;
> +	count -= state->node_ents >> 2;
> +	count -= nodehdr.count;
> +
>  	/* start with smaller blk num */
> -	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
> +	forward = nodehdr.forw < nodehdr.back;
>  	for (i = 0; i < 2; forward = !forward, i++) {
>  		if (forward)
> -			blkno = be32_to_cpu(info->forw);
> +			blkno = nodehdr.forw;
>  		else
> -			blkno = be32_to_cpu(info->back);
> +			blkno = nodehdr.back;
>  		if (blkno == 0)
>  			continue;
> -		error = xfs_da_node_read(state->args->trans, state->args->dp,
> +		error = xfs_da3_node_read(state->args->trans, state->args->dp,
>  					blkno, -1, &bp, state->args->whichfork);
>  		if (error)
>  			return(error);
> -		ASSERT(bp != NULL);
>  
> -		node = (xfs_da_intnode_t *)info;
> -		count  = state->node_ents;
> -		count -= state->node_ents >> 2;
> -		count -= be16_to_cpu(node->hdr.count);

At first initializing count outside the loop looked like a problem to me, but
upon a closer inspection now I believe it's ok.

>  /*
> - * Unbalance the btree elements between two intermediate nodes,
> + * Unbalance the elements between two intermediate nodes,
>   * move all Btree elements from one node into another.
>   */
>  STATIC void
> -xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
> -				     xfs_da_state_blk_t *save_blk)
> +xfs_da3_node_unbalance(
> +	struct xfs_da_state	*state,
> +	struct xfs_da_state_blk	*drop_blk,
> +	struct xfs_da_state_blk	*save_blk)
>  {
> -	xfs_da_intnode_t *drop_node, *save_node;
> -	xfs_da_node_entry_t *btree;
> -	int tmp;
> -	xfs_trans_t *tp;
> +	struct xfs_da_intnode	*drop_node;
> +	struct xfs_da_intnode	*save_node;
> +	struct xfs_da_node_entry *dbtree;
> +	struct xfs_da_node_entry *sbtree;
> +	struct xfs_da3_icnode_hdr dhdr;
> +	struct xfs_da3_icnode_hdr shdr;

Would be better as drop and save.

> +	struct xfs_trans	*tp;
> +	int			sindex;
> +	int			tmp;
>  
>  	trace_xfs_da_node_unbalance(state->args);
>  
>  	drop_node = drop_blk->bp->b_addr;
>  	save_node = save_blk->bp->b_addr;
> -	ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
> -	ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
> +	xfs_da3_node_hdr_from_disk(&dhdr, drop_node);
> +	xfs_da3_node_hdr_from_disk(&shdr, save_node);
> +	dbtree = xfs_da3_node_tree_p(drop_node);
> +	sbtree = xfs_da3_node_tree_p(save_node);
>  	tp = state->args->trans;
>  
>  	/*
>  	 * If the dying block has lower hashvals, then move all the
>  	 * elements in the remaining block up to make a hole.
>  	 */
> -	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
> -	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
> -	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
> -	{
> -		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
> -		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
> -		memmove(btree, &save_node->btree[0], tmp);
> -		btree = &save_node->btree[0];
> +	if ((be32_to_cpu(dbtree[0].hashval) < be32_to_cpu(sbtree[ 0 ].hashval)) ||
								 ^ ^

Extra spaces.

> +	    (be32_to_cpu(dbtree[dhdr.count - 1].hashval) <
> +				be32_to_cpu(sbtree[shdr.count - 1].hashval))) {
> +		/* XXX: check this - is memmove dst correct? */
> +		tmp = shdr.count * (uint)sizeof(xfs_da_node_entry_t);
> +		memmove(&sbtree[dhdr.count], &sbtree[0], tmp);

I believe the destination in this memmove is correct.  We're moving items from
the beginning of the save_node toward the end of save_node so that later we can
copy from drop_node into the beginning of save_node.  Making room.

> +
> +		sindex = 0;
>  		xfs_trans_log_buf(tp, save_blk->bp,
> -			XFS_DA_LOGRANGE(save_node, btree,
> -				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
> -				sizeof(xfs_da_node_entry_t)));
> +			XFS_DA_LOGRANGE(save_node, &sbtree[0],
> +				(shdr.count + dhdr.count) *
> +						sizeof(xfs_da_node_entry_t)));

Seems like here we are logging more of the node than is strictly necessary.  So
far we have only modified save_node at shdr.count up to shdr.count +
dhdr.count, there is no need to log the beginning of save_node until after
we've copied from drop_node into it.  Not a big deal I guess.

>  	} else {
> -		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
> +		sindex = shdr.count;
>  		xfs_trans_log_buf(tp, save_blk->bp,
> -			XFS_DA_LOGRANGE(save_node, btree,
> -				be16_to_cpu(drop_node->hdr.count) *
> -				sizeof(xfs_da_node_entry_t)));
> +			XFS_DA_LOGRANGE(save_node, &sbtree[sindex],
> +				dhdr.count * sizeof(xfs_da_node_entry_t)));

And here it seems that we're logging save_node before we've made any
modification to it.  Maybe I'm missing something.

>  	}
>  
>  	/*
>  	 * Move all the B-tree elements from drop_blk to save_blk.
>  	 */
> -	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
> -	memcpy(btree, &drop_node->btree[0], tmp);
> -	be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
> +	tmp = dhdr.count * (uint)sizeof(xfs_da_node_entry_t);
> +	memcpy(&sbtree[sindex], &dbtree[0], tmp);
> +	shdr.count += dhdr.count;
> +	xfs_da3_node_hdr_to_disk(save_node, &shdr);
>  	xfs_trans_log_buf(tp, save_blk->bp,
>  		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
> -			sizeof(save_node->hdr)));
> +				xfs_da3_node_hdr_size(save_node)));
>  

And then after modifying save_node by copying stuff in from drop_blk we only
log the header, and not the entries.  I guess I'd prefer to see the entries
logged after they've been modified.  It's not a bug, looks like.. just not what
I'm used to seeing.

> @@ -1190,66 +1478,73 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
>  		}
>  		curr = blk->bp->b_addr;
>  		blk->magic = be16_to_cpu(curr->magic);
> -		ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
> -		       blk->magic == XFS_DIR2_LEAFN_MAGIC ||
> -		       blk->magic == XFS_ATTR_LEAF_MAGIC);
> +
> +		if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
> +			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
> +			break;
> +		}
> +
> +		if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
> +		    blk->magic == XFS_DIR3_LEAFN_MAGIC) {
> +			blk->magic = XFS_DIR2_LEAFN_MAGIC;
> +			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
> +			break;
> +		}

Something like this would be better for this rearrangement:

	if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
		...
		break;
	} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||
		   blk->magic == XFS_DIR3_LEAFN_MAGIC) {
		...
		break;
	} else if (blk->magic != XFS_DA_NODE_MAGIC) {
		return XFS_ERROR(EFSCORRUPTED);
	}

So that we're sure what kind of block we have.

> +
> +		blk->magic = XFS_DA_NODE_MAGIC;
> +

Why was that assignment necessary?

> @@ -1316,9 +1647,6 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
>  	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
>  	       old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
>  	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);

Seems like these asserts need to be updated.

> -	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
> -	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
> -	ASSERT(old_blk->magic == new_blk->magic);

These seem like good asserts.

> @@ -1449,8 +1738,6 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
>  	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
>  	       save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
>  	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);

Seems like these asserts need to be updated.

> -	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
> -	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));

These two seem like useful asserts.

> @@ -1856,17 +2176,22 @@ xfs_da_swap_lastblock(
>  		dead_level = 0;
>  		dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
>  	} else {
> -		ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
> +		struct xfs_da3_icnode_hdr deadhdr;
> +
> +		ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
> +		       dead_info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));

I think these are covered by asserts in xfs_da3_node_hdr_from_disk and can be removed.

> @@ -1912,31 +2237,31 @@ xfs_da_swap_lastblock(
>  	 * Walk down the tree looking for the parent of the moved block.
>  	 */
>  	for (;;) {
> -		error = xfs_da_node_read(tp, ip, par_blkno, -1, &par_buf, w);
> +		error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w);
>  		if (error)
>  			goto done;
>  		par_node = par_buf->b_addr;
> -		if (unlikely(par_node->hdr.info.magic !=
> -		    cpu_to_be16(XFS_DA_NODE_MAGIC) ||
> -		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
> +		xfs_da3_node_hdr_from_disk(&par_hdr, par_node);
> +		if (level >= 0 && level != par_hdr.level + 1) {

Do we need to test par_node magic here like we were before? 

>  			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
>  					 XFS_ERRLEVEL_LOW, mp);
>  			error = XFS_ERROR(EFSCORRUPTED);
>  			goto done;
>  		}

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs