Re: [PATCH 2/9] xfs: add support for large btree blocks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jan 22, 2013 at 12:25:53AM +1100, Dave Chinner wrote:
> From: Christoph Hellwig <hch@xxxxxx>
> 
> Add support for larger btree blocks that contains a CRC32C checksum,
> a filesystem uuid and block number for detecting filesystem
> consistency and out of place writes.
> 
> [dchinner@xxxxxxxxxx] Also include an owner field to allow reverse
> mappings to be implemented for improved repairability and a LSN
> field to so that log recovery can easily determine the last
> modification that made it to disk for each buffer.
> 
> [dchinner@xxxxxxxxxx] Add buffer log format flags to indicate the
> type of buffer to recovery so that we don't have to do blind magic
> number tests to determine what the buffer is.
> 
> [dchinner@xxxxxxxxxx] Modified to fit into the verifier structure.

This patch is far too large for a good review.  It needs to be split up into
it's various ideas which you outlined in patch 0.  If you need to add dead code
in each piece and then enable it at the end, that's fine with me.

Some comments below.

> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_alloc_btree.c  |  105 +++++++++++++------
>  fs/xfs/xfs_alloc_btree.h  |   12 ++-
>  fs/xfs/xfs_attr_leaf.c    |    2 +-
>  fs/xfs/xfs_bmap.c         |   42 +++++---
>  fs/xfs/xfs_bmap_btree.c   |  110 +++++++++++++------
>  fs/xfs/xfs_bmap_btree.h   |   19 ++--
>  fs/xfs/xfs_btree.c        |  256 +++++++++++++++++++++++++++++++++++++--------
>  fs/xfs/xfs_btree.h        |   64 ++++++++++--
>  fs/xfs/xfs_buf_item.h     |   24 ++++-
>  fs/xfs/xfs_dinode.h       |    4 +-
>  fs/xfs/xfs_fsops.c        |   23 +++-
>  fs/xfs/xfs_ialloc_btree.c |   87 ++++++++++-----
>  fs/xfs/xfs_ialloc_btree.h |    9 +-
>  fs/xfs/xfs_inode.c        |   33 +++---
>  fs/xfs/xfs_log_recover.c  |   28 +++++
>  fs/xfs/xfs_trans.h        |    2 +
>  fs/xfs/xfs_trans_buf.c    |   29 +++--
>  17 files changed, 643 insertions(+), 206 deletions(-)
> 
> diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
> index b1ddef6..30c4c14 100644
> --- a/fs/xfs/xfs_alloc_btree.c
> +++ b/fs/xfs/xfs_alloc_btree.c
> @@ -33,6 +33,7 @@
>  #include "xfs_extent_busy.h"
>  #include "xfs_error.h"
>  #include "xfs_trace.h"
> +#include "xfs_cksum.h"
>  
>  
>  STATIC struct xfs_btree_cur *
> @@ -272,7 +273,7 @@ xfs_allocbt_key_diff(
>  	return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
>  }
>  
> -static void
> +static bool
>  xfs_allocbt_verify(
>  	struct xfs_buf		*bp)
>  {
> @@ -280,66 +281,103 @@ xfs_allocbt_verify(
>  	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
>  	struct xfs_perag	*pag = bp->b_pag;
>  	unsigned int		level;
> -	int			sblock_ok; /* block passes checks */
>  
>  	/*
>  	 * magic number and level verification
>  	 *
> -	 * During growfs operations, we can't verify the exact level as the
> -	 * perag is not fully initialised and hence not attached to the buffer.
> -	 * In this case, check against the maximum tree depth.
> +	 * During growfs operations, we can't verify the exact level or owner as
> +	 * the perag is not fully initialised and hence not attached to the
> +	 * buffer.  In this case, check against the maximum tree depth.
> +	 *
> +	 * Similarly, during log recovery we will have a perag structure
> +	 * attached, but the agf information will not yet have been initialised
> +	 * from the on disk AGF. Again, we can only check against maximum limits
> +	 * in this case.
>  	 */
>  	level = be16_to_cpu(block->bb_level);
>  	switch (block->bb_magic) {
> +	case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
> +		if (!xfs_sb_version_hascrc(&mp->m_sb))
> +			return false;
> +		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
> +			return false;
> +		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
> +			return false;
> +		if (pag &&
> +		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
> +			return false;
> +		/* fall through */
>  	case cpu_to_be32(XFS_ABTB_MAGIC):
> -		if (pag)
> -			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
> -		else
> -			sblock_ok = level < mp->m_ag_maxlevels;
> +		if (pag && pag->pagf_init) {
> +			if (level >= pag->pagf_levels[XFS_BTNUM_BNOi])
> +				return false;
> +		} else if (level >= mp->m_ag_maxlevels)
> +			return false;
>  		break;
> +	case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
> +		if (!xfs_sb_version_hascrc(&mp->m_sb))
> +			return false;
> +		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
> +			return false;
> +		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
> +			return false;
> +		if (pag &&
> +		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
> +			return false;
> +		/* fall through */
>  	case cpu_to_be32(XFS_ABTC_MAGIC):
> -		if (pag)
> -			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
> -		else
> -			sblock_ok = level < mp->m_ag_maxlevels;
> +		if (pag && pag->pagf_init) {
> +			if (level >= pag->pagf_levels[XFS_BTNUM_CNTi])
> +				return false;
> +		} else if (level >= mp->m_ag_maxlevels)
> +			return false;
>  		break;
>  	default:
> -		sblock_ok = 0;
> -		break;
> +		return false;
>  	}
>  
>  	/* numrecs verification */
> -	sblock_ok = sblock_ok &&
> -		be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];
> +	if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
> +		return false;
>  
>  	/* sibling pointer verification */
> -	sblock_ok = sblock_ok &&
> -		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
> -		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
> -		block->bb_u.s.bb_leftsib &&
> -		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
> -		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
> -		block->bb_u.s.bb_rightsib;
> -
> -	if (!sblock_ok) {
> -		trace_xfs_btree_corrupt(bp, _RET_IP_);
> -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
> -		xfs_buf_ioerror(bp, EFSCORRUPTED);
> -	}
> +	if (!block->bb_u.s.bb_leftsib ||
> +	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
> +	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
> +		return false;
> +	if (!block->bb_u.s.bb_rightsib ||
> +	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
> +	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
> +		return false;
> +
> +	return true;
>  }
>  
>  static void
>  xfs_allocbt_read_verify(
>  	struct xfs_buf	*bp)
>  {
> -	xfs_allocbt_verify(bp);
> +	if (!(xfs_btree_sblock_verify_crc(bp) &&
> +	      xfs_allocbt_verify(bp))) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
> +				     bp->b_target->bt_mount, bp->b_addr);
> +		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +	}
>  }
>  
>  static void
>  xfs_allocbt_write_verify(
>  	struct xfs_buf	*bp)
>  {
> -	xfs_allocbt_verify(bp);
> +	if (!xfs_allocbt_verify(bp)) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
> +				     bp->b_target->bt_mount, bp->b_addr);
> +		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +	}
> +	xfs_btree_sblock_calc_crc(bp);
> +
>  }
>  
>  const struct xfs_buf_ops xfs_allocbt_buf_ops = {
> @@ -444,6 +482,9 @@ xfs_allocbt_init_cursor(
>  	cur->bc_private.a.agbp = agbp;
>  	cur->bc_private.a.agno = agno;
>  
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
> +
>  	return cur;
>  }
>  
> diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
> index 7e89a2b..087465b 100644
> --- a/fs/xfs/xfs_alloc_btree.h
> +++ b/fs/xfs/xfs_alloc_btree.h
> @@ -31,8 +31,10 @@ struct xfs_mount;
>   * by blockcount and blockno.  All blocks look the same to make the code
>   * simpler; if we have time later, we'll make the optimizations.
>   */
> -#define	XFS_ABTB_MAGIC	0x41425442	/* 'ABTB' for bno tree */
> -#define	XFS_ABTC_MAGIC	0x41425443	/* 'ABTC' for cnt tree */
> +#define	XFS_ABTB_MAGIC		0x41425442	/* 'ABTB' for bno tree */
> +#define	XFS_ABTB_CRC_MAGIC	0x4142544a
> +#define	XFS_ABTC_MAGIC		0x41425443	/* 'ABTC' for cnt tree */
> +#define	XFS_ABTC_CRC_MAGIC	0x4142544b

/* 'ATB?' */

Add comment.

>  /*
>   * Data record/key structure
> @@ -59,10 +61,10 @@ typedef __be32 xfs_alloc_ptr_t;
>  
>  /*
>   * Btree block header size depends on a superblock flag.
> - *
> - * (not quite yet, but soon)
>   */
> -#define XFS_ALLOC_BLOCK_LEN(mp)	XFS_BTREE_SBLOCK_LEN
> +#define XFS_ALLOC_BLOCK_LEN(mp) \
> +	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
> +		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)

The allocbt changes seem to be a good fit for their own patch.

>  
>  /*
>   * Record, key, and pointer address macros for btree blocks.
> diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
> index f96a734..aa4765f 100644
> --- a/fs/xfs/xfs_attr_leaf.c
> +++ b/fs/xfs/xfs_attr_leaf.c
> @@ -232,7 +232,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
>  				return 0;
>  			return dp->i_d.di_forkoff;
>  		}
> -		dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
> +		dsize = XFS_BMAP_BROOT_SPACE(mp, dp->i_df.if_broot);

Changes to XFS_BMAP_BROOT_SPACE are a good candidate for a separate patch, just
as with LITINO.

>  		break;
>  	}
>  
> diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
> index f338012..821f599 100644
> --- a/fs/xfs/xfs_bmap.c
> +++ b/fs/xfs/xfs_bmap.c
> @@ -3053,6 +3053,7 @@ xfs_bmap_extents_to_btree(
>  	xfs_extnum_t		nextents;	/* number of file extents */
>  	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
>  
> +	mp = ip->i_mount;
>  	ifp = XFS_IFORK_PTR(ip, whichfork);
>  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
>  
> @@ -3066,16 +3067,18 @@ xfs_bmap_extents_to_btree(
>  	 * Fill in the root.
>  	 */
>  	block = ifp->if_broot;
> -	block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
> -	block->bb_level = cpu_to_be16(1);
> -	block->bb_numrecs = cpu_to_be16(1);
> -	block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
> -	block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
> +				 XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
> +				 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
> +	else
> +		xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
> +				 XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
> +				 XFS_BTREE_LONG_PTRS);
>  
>  	/*
>  	 * Need a cursor.  Can't allocate until bb_level is filled in.
>  	 */
> -	mp = ip->i_mount;
>  	cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
>  	cur->bc_private.b.firstblock = *firstblock;
>  	cur->bc_private.b.flist = flist;
> @@ -3124,10 +3127,15 @@ xfs_bmap_extents_to_btree(
>  	 */
>  	abp->b_ops = &xfs_bmbt_buf_ops;
>  	ablock = XFS_BUF_TO_BLOCK(abp);
> -	ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
> -	ablock->bb_level = 0;
> -	ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
> -	ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		xfs_btree_init_block_int(mp, ablock, abp->b_bn,
> +				XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
> +				XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
> +	else
> +		xfs_btree_init_block_int(mp, ablock, abp->b_bn,
> +				XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
> +				XFS_BTREE_LONG_PTRS);
> +
>  	arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
>  	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
>  	for (cnt = i = 0; i < nextents; i++) {
> @@ -3155,8 +3163,8 @@ xfs_bmap_extents_to_btree(
>  	 * Do all this logging at the end so that
>  	 * the root is at the right level.
>  	 */
> -	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
>  	xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
> +	xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);

Huh.  Why was that necessary?

>  	ASSERT(*curp == NULL);
>  	*curp = cur;
>  	*logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
> @@ -3268,8 +3276,12 @@ xfs_bmap_local_to_extents(
>  		*firstblock = args.fsbno;
>  		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
>  		bp->b_ops = &xfs_bmbt_buf_ops;
> +
>  		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);

Conflicts now due to 1e82379b0

> +
> +		xfs_trans_buf_set_type(tp, bp, XFS_BLF_BTREE_BUF);

The xfs_trans_buf_set_type changes are a good candidate for a separate patch.

>  		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
> +
>  		xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
>  		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
>  		xfs_iext_add(ifp, 0, 1);
> @@ -4023,11 +4035,15 @@ xfs_bmap_sanity_check(
>  {
>  	struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
>  
> -	if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
> -	    be16_to_cpu(block->bb_level) != level ||
> +	if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
> +	    block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
> +		return 0;
> +
> +	if (be16_to_cpu(block->bb_level) != level ||
>  	    be16_to_cpu(block->bb_numrecs) == 0 ||
>  	    be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
>  		return 0;
> +
>  	return 1;
>  }
>  
> diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
> index 061b45c..3a86c3f 100644
> --- a/fs/xfs/xfs_bmap_btree.c
> +++ b/fs/xfs/xfs_bmap_btree.c
> @@ -37,6 +37,7 @@
>  #include "xfs_error.h"
>  #include "xfs_quota.h"
>  #include "xfs_trace.h"
> +#include "xfs_cksum.h"
>  
>  /*
>   * Determine the extent state.
> @@ -59,24 +60,31 @@ xfs_extent_state(
>   */
>  void
>  xfs_bmdr_to_bmbt(
> -	struct xfs_mount	*mp,
> +	struct xfs_inode	*ip,
>  	xfs_bmdr_block_t	*dblock,
>  	int			dblocklen,
>  	struct xfs_btree_block	*rblock,
>  	int			rblocklen)
>  {
> +	struct xfs_mount	*mp = ip->i_mount;
>  	int			dmxr;
>  	xfs_bmbt_key_t		*fkp;
>  	__be64			*fpp;
>  	xfs_bmbt_key_t		*tkp;
>  	__be64			*tpp;
>  
> -	rblock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC);
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
> +				 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
> +				 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
> +	else
> +		xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
> +				 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
> +				 XFS_BTREE_LONG_PTRS);
> +
>  	rblock->bb_level = dblock->bb_level;
>  	ASSERT(be16_to_cpu(rblock->bb_level) > 0);
>  	rblock->bb_numrecs = dblock->bb_numrecs;
> -	rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
> -	rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
>  	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
>  	fkp = XFS_BMDR_KEY_ADDR(dblock, 1);
>  	tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);
> @@ -424,7 +432,13 @@ xfs_bmbt_to_bmdr(
>  	xfs_bmbt_key_t		*tkp;
>  	__be64			*tpp;
>  
> -	ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
> +	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
> +		ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
> +		ASSERT(rblock->bb_u.l.bb_blkno ==
> +		       cpu_to_be64(XFS_BUF_DADDR_NULL));
> +	} else
> +		ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
>  	ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
>  	ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
>  	ASSERT(rblock->bb_level != 0);
> @@ -708,59 +722,89 @@ xfs_bmbt_key_diff(
>  				      cur->bc_rec.b.br_startoff;
>  }
>  
> -static void
> +static int
>  xfs_bmbt_verify(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_mount	*mp = bp->b_target->bt_mount;
>  	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
>  	unsigned int		level;
> -	int			lblock_ok; /* block passes checks */
>  
> -	/* magic number and level verification.
> +	switch (block->bb_magic) {
> +	case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
> +		if (!xfs_sb_version_hascrc(&mp->m_sb))
> +			return false;
> +		if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
> +			return false;
> +		if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
> +			return false;
> +		/*
> +		 * XXX: need a better way of verifying the owner here. Right now
> +		 * just make sure there has been one set.
> +		 */
> +		if (be64_to_cpu(block->bb_u.l.bb_owner) == 0)
> +			return false;
> +		/* fall through */
> +	case cpu_to_be32(XFS_BMAP_MAGIC):
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	/*
> +	 * numrecs and level verification.
>  	 *
> -	 * We don't know waht fork we belong to, so just verify that the level
> +	 * We don't know what fork we belong to, so just verify that the level
>  	 * is less than the maximum of the two. Later checks will be more
>  	 * precise.
>  	 */
>  	level = be16_to_cpu(block->bb_level);
> -	lblock_ok = block->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC) &&
> -		    level < max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]);
> -
> -	/* numrecs verification */
> -	lblock_ok = lblock_ok &&
> -		be16_to_cpu(block->bb_numrecs) <= mp->m_bmap_dmxr[level != 0];
> +	if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
> +		return false;
> +	if (be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
> +		return false;
>  
>  	/* sibling pointer verification */
> -	lblock_ok = lblock_ok &&
> -		block->bb_u.l.bb_leftsib &&
> -		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
> -		 XFS_FSB_SANITY_CHECK(mp,
> -			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
> -		block->bb_u.l.bb_rightsib &&
> -		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
> -		 XFS_FSB_SANITY_CHECK(mp,
> -			be64_to_cpu(block->bb_u.l.bb_rightsib)));
> -
> -	if (!lblock_ok) {
> -		trace_xfs_btree_corrupt(bp, _RET_IP_);
> -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
> -		xfs_buf_ioerror(bp, EFSCORRUPTED);
> -	}
> +	if (!block->bb_u.l.bb_leftsib ||
> +	    (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLDFSBNO) &&
> +	     !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))))
> +		return false;
> +	if (!block->bb_u.l.bb_rightsib ||
> +	    (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLDFSBNO) &&
> +	     !XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))))
> +		return false;
> +
> +	return true;
> +
>  }
>  
>  static void
>  xfs_bmbt_read_verify(
>  	struct xfs_buf	*bp)
>  {
> -	xfs_bmbt_verify(bp);
> +	if (!(xfs_btree_lblock_verify_crc(bp) &&
> +	      xfs_bmbt_verify(bp))) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
> +				     bp->b_target->bt_mount, bp->b_addr);
> +		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +	}
> +
>  }
>  
>  static void
>  xfs_bmbt_write_verify(
>  	struct xfs_buf	*bp)
>  {
> -	xfs_bmbt_verify(bp);
> +	if (!xfs_bmbt_verify(bp)) {
> +		xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
> +				     bp->b_target->bt_mount, bp->b_addr);
> +		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +		return;
> +	}
> +	xfs_btree_lblock_calc_crc(bp);
>  }
>  
>  const struct xfs_buf_ops xfs_bmbt_buf_ops = {
> @@ -838,6 +882,8 @@ xfs_bmbt_init_cursor(
>  
>  	cur->bc_ops = &xfs_bmbt_ops;
>  	cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
>  
>  	cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
>  	cur->bc_private.b.ip = ip;
> diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
> index 88469ca..1b2f3e7 100644
> --- a/fs/xfs/xfs_bmap_btree.h
> +++ b/fs/xfs/xfs_bmap_btree.h
> @@ -18,7 +18,8 @@
>  #ifndef __XFS_BMAP_BTREE_H__
>  #define __XFS_BMAP_BTREE_H__
>  
> -#define XFS_BMAP_MAGIC	0x424d4150	/* 'BMAP' */
> +#define XFS_BMAP_MAGIC		0x424d4150	/* 'BMAP' */
> +#define XFS_BMAP_CRC_MAGIC	0x424d4158

BMAY

Add a comment.

>  struct xfs_btree_cur;
>  struct xfs_btree_block;
> @@ -136,10 +137,10 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
>  
>  /*
>   * Btree block header size depends on a superblock flag.
> - *
> - * (not quite yet, but soon)
>   */
> -#define XFS_BMBT_BLOCK_LEN(mp)	XFS_BTREE_LBLOCK_LEN
> +#define XFS_BMBT_BLOCK_LEN(mp) \
> +	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
> +		XFS_BTREE_LBLOCK_CRC_LEN : XFS_BTREE_LBLOCK_LEN)
>  
>  #define XFS_BMBT_REC_ADDR(mp, block, index) \
>  	((xfs_bmbt_rec_t *) \
> @@ -186,12 +187,12 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
>  #define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \
>  	XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0))
>  
> -#define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \
> -	(int)(XFS_BTREE_LBLOCK_LEN + \
> +#define XFS_BMAP_BROOT_SPACE_CALC(mp, nrecs) \
> +	(int)(XFS_BMBT_BLOCK_LEN(mp) + \
>  	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))
>  
> -#define XFS_BMAP_BROOT_SPACE(bb) \
> -	(XFS_BMAP_BROOT_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))
> +#define XFS_BMAP_BROOT_SPACE(mp, bb) \
> +	(XFS_BMAP_BROOT_SPACE_CALC(mp, be16_to_cpu((bb)->bb_numrecs)))
>  #define XFS_BMDR_SPACE_CALC(nrecs) \
>  	(int)(sizeof(xfs_bmdr_block_t) + \
>  	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t))))

Are these large broots going to force attributes out of line?

> @@ -204,7 +205,7 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;
>  /*
>   * Prototypes for xfs_bmap.c to call.
>   */
> -extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int,
> +extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int,
>  			struct xfs_btree_block *, int);
>  extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s);
>  extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r);
> diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
> index db01040..9e8fcad 100644
> --- a/fs/xfs/xfs_btree.c
> +++ b/fs/xfs/xfs_btree.c
> @@ -30,9 +30,11 @@
>  #include "xfs_dinode.h"
>  #include "xfs_inode.h"
>  #include "xfs_inode_item.h"
> +#include "xfs_buf_item.h"
>  #include "xfs_btree.h"
>  #include "xfs_error.h"
>  #include "xfs_trace.h"
> +#include "xfs_cksum.h"
>  
>  /*
>   * Cursor allocation zone.
> @@ -42,9 +44,13 @@ kmem_zone_t	*xfs_btree_cur_zone;
>  /*
>   * Btree magic numbers.
>   */
> -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
> -	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
> +static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
> +	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC },
> +	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
> +	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC }
>  };
> +#define xfs_btree_magic(cur) \
> +	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
>  
>  
>  STATIC int				/* error (0 or EFSCORRUPTED) */
> @@ -54,30 +60,38 @@ xfs_btree_check_lblock(
>  	int			level,	/* level of the btree block */
>  	struct xfs_buf		*bp)	/* buffer for block, if any */
>  {
> -	int			lblock_ok; /* block passes checks */
> +	int			lblock_ok = 1; /* block passes checks */
>  	struct xfs_mount	*mp;	/* file system mount point */
>  
>  	mp = cur->bc_mp;
> -	lblock_ok =
> -		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
> +
> +	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +		lblock_ok = lblock_ok &&
> +			uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
> +			block->bb_u.l.bb_blkno == cpu_to_be64(
> +				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
> +	}
> +
> +	lblock_ok = lblock_ok &&
> +		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
>  		be16_to_cpu(block->bb_level) == level &&
>  		be16_to_cpu(block->bb_numrecs) <=
>  			cur->bc_ops->get_maxrecs(cur, level) &&
>  		block->bb_u.l.bb_leftsib &&
>  		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
>  		 XFS_FSB_SANITY_CHECK(mp,
> -		 	be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
> +			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
>  		block->bb_u.l.bb_rightsib &&
>  		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
>  		 XFS_FSB_SANITY_CHECK(mp,
> -		 	be64_to_cpu(block->bb_u.l.bb_rightsib)));
> +			be64_to_cpu(block->bb_u.l.bb_rightsib)));
> +
>  	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
>  			XFS_ERRTAG_BTREE_CHECK_LBLOCK,
>  			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
>  		if (bp)
>  			trace_xfs_btree_corrupt(bp, _RET_IP_);
> -		XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
> -				 mp);
> +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
>  		return XFS_ERROR(EFSCORRUPTED);
>  	}
>  	return 0;
> @@ -90,16 +104,26 @@ xfs_btree_check_sblock(
>  	int			level,	/* level of the btree block */
>  	struct xfs_buf		*bp)	/* buffer containing block */
>  {
> +	struct xfs_mount	*mp;	/* file system mount point */
>  	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */
>  	struct xfs_agf		*agf;	/* ag. freespace structure */
>  	xfs_agblock_t		agflen;	/* native ag. freespace length */
> -	int			sblock_ok; /* block passes checks */
> +	int			sblock_ok = 1; /* block passes checks */
>  
> +	mp = cur->bc_mp;
>  	agbp = cur->bc_private.a.agbp;
>  	agf = XFS_BUF_TO_AGF(agbp);
>  	agflen = be32_to_cpu(agf->agf_length);
> -	sblock_ok =
> -		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
> +
> +	if (xfs_sb_version_hascrc(&mp->m_sb)) {
> +		sblock_ok = sblock_ok &&
> +			uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
> +			block->bb_u.s.bb_blkno == cpu_to_be64(
> +				bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
> +	}
> +
> +	sblock_ok = sblock_ok &&
> +		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
>  		be16_to_cpu(block->bb_level) == level &&
>  		be16_to_cpu(block->bb_numrecs) <=
>  			cur->bc_ops->get_maxrecs(cur, level) &&
> @@ -109,13 +133,13 @@ xfs_btree_check_sblock(
>  		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
>  		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
>  		block->bb_u.s.bb_rightsib;
> -	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
> +
> +	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
>  			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
>  			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
>  		if (bp)
>  			trace_xfs_btree_corrupt(bp, _RET_IP_);
> -		XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
> -			XFS_ERRLEVEL_LOW, cur->bc_mp, block);
> +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
>  		return XFS_ERROR(EFSCORRUPTED);
>  	}
>  	return 0;
> @@ -194,6 +218,72 @@ xfs_btree_check_ptr(
>  #endif
>  
>  /*
> + * Calculate CRC on the whole btree block and stuff it into the
> + * long-form btree header.
> + *
> + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
	       calculating

> + * it into the buffer so recovery knows what the last modifcation was that made
> + * it to disk.
> + */
> +void
> +xfs_btree_lblock_calc_crc(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
> +	struct xfs_buf_log_item	*bip = bp->b_fspriv;
> +
> +	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> +		return;
> +	if (bip)
> +		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
> +	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
> +			 XFS_BTREE_LBLOCK_CRC_OFF);
> +}
> +
> +bool
> +xfs_btree_lblock_verify_crc(
> +	struct xfs_buf		*bp)
> +{
> +	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> +		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
> +					XFS_BTREE_LBLOCK_CRC_OFF);
> +	return true;
> +}
> +
> +/*
> + * Calculate CRC on the whole btree block and stuff it into the
> + * short-form btree header.
> + *
> + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
> + * it into the buffer so recovery knows what the last modifcation was that made
> + * it to disk.
> + */
> +void
> +xfs_btree_sblock_calc_crc(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
> +	struct xfs_buf_log_item	*bip = bp->b_fspriv;
> +
> +	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> +		return;
> +	if (bip)
> +		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
> +	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
> +			 XFS_BTREE_SBLOCK_CRC_OFF);
> +}
> +
> +bool
> +xfs_btree_sblock_verify_crc(
> +	struct xfs_buf		*bp)
> +{
> +	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
> +		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
> +					XFS_BTREE_SBLOCK_CRC_OFF);
> +	return true;
> +}
> +
> +/*
>   * Delete the btree cursor.
>   */
>  void
> @@ -277,10 +367,8 @@ xfs_btree_dup_cursor(
>  				*ncur = NULL;
>  				return error;
>  			}
> -			new->bc_bufs[i] = bp;
> -			ASSERT(!xfs_buf_geterror(bp));
> -		} else
> -			new->bc_bufs[i] = NULL;
> +		}
> +		new->bc_bufs[i] = bp;

Why remove the assert?

>  	}
>  	*ncur = new;
>  	return 0;
> @@ -321,9 +409,14 @@ xfs_btree_dup_cursor(
>   */
>  static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
>  {
> -	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
> -		XFS_BTREE_LBLOCK_LEN :
> -		XFS_BTREE_SBLOCK_LEN;
> +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
> +		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
> +			return XFS_BTREE_LBLOCK_CRC_LEN;
> +		return XFS_BTREE_LBLOCK_LEN;
> +	}
> +	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
> +		return XFS_BTREE_SBLOCK_CRC_LEN;
> +	return XFS_BTREE_SBLOCK_LEN;
>  }
>  
>  /*
> @@ -863,43 +956,85 @@ xfs_btree_set_sibling(
>  }
>  
>  void
> +xfs_btree_init_block_int(
> +	struct xfs_mount	*mp,
> +	struct xfs_btree_block	*buf,
> +	xfs_daddr_t		blkno,
> +	__u32			magic,
> +	__u16			level,
> +	__u16			numrecs,
> +	__u64			owner,
> +	unsigned int		flags)
> +{
> +	buf->bb_magic = cpu_to_be32(magic);
> +	buf->bb_level = cpu_to_be16(level);
> +	buf->bb_numrecs = cpu_to_be16(numrecs);
> +
> +	if (flags & XFS_BTREE_LONG_PTRS) {
> +		buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
> +		buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
> +		if (flags & XFS_BTREE_CRC_BLOCKS) {
> +			buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
> +			buf->bb_u.l.bb_owner = cpu_to_be64(owner);
> +			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
> +			buf->bb_u.l.bb_pad = 0;
> +		}
> +	} else {
> +		/* owner is a 32 bit value on short blocks */
> +		__u32 __owner = (__u32)owner;
> +
> +		buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
> +		buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
> +		if (flags & XFS_BTREE_CRC_BLOCKS) {
> +			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
> +			buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
> +			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
> +		}
> +	}
> +}
> +

The xfs_btree_init_block_int might be good in it's own patch.

> +void
>  xfs_btree_init_block(
>  	struct xfs_mount *mp,
>  	struct xfs_buf	*bp,
>  	__u32		magic,
>  	__u16		level,
>  	__u16		numrecs,
> +	__u64		owner,
>  	unsigned int	flags)
>  {
> -	struct xfs_btree_block	*new = XFS_BUF_TO_BLOCK(bp);
> -
> -	new->bb_magic = cpu_to_be32(magic);
> -	new->bb_level = cpu_to_be16(level);
> -	new->bb_numrecs = cpu_to_be16(numrecs);
> -
> -	if (flags & XFS_BTREE_LONG_PTRS) {
> -		new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO);
> -		new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO);
> -	} else {
> -		new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
> -		new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
> -	}
> +	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
> +				 magic, level, numrecs, owner, flags);
>  }
>  
>  STATIC void
>  xfs_btree_init_block_cur(
>  	struct xfs_btree_cur	*cur,
> +	struct xfs_buf		*bp,
>  	int			level,
> -	int			numrecs,
> -	struct xfs_buf		*bp)
> +	int			numrecs)

Why rearrange the order of the args here?

>  {
> -	xfs_btree_init_block(cur->bc_mp, bp, xfs_magics[cur->bc_btnum],
> -			       level, numrecs, cur->bc_flags);
> +	__u64 owner;
> +
> +	/*
> +	 * we can pull the owner from the cursor right now as the different
> +	 * owners align directly with the pointer size of the btree. This may
> +	 * change in future, but is safe for current users of the generic btree
> +	 * code.
> +	 */
> +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> +		owner = cur->bc_private.b.ip->i_ino;
> +	else
> +		owner = cur->bc_private.a.agno;

This is something I can look into a bit later... but I'm a little flummoxed by
this one.  Apparently only inodes use long pointers?

> +
> +	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
> +				 xfs_btree_magic(cur), level, numrecs,
> +				 owner, cur->bc_flags);
>  }
>  
>  /*
>   * Return true if ptr is the last record in the btree and
> - * we need to track updateѕ to this record.  The decision
> + * we need to track updates to this record.  The decision

I can't find a change in that line.  Time for new glasses?

>   * will be further refined in the update_lastrec method.
>   */
>  STATIC int
> @@ -1147,6 +1282,7 @@ xfs_btree_log_keys(
>  	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
>  
>  	if (bp) {
> +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
>  		xfs_trans_log_buf(cur->bc_tp, bp,
>  				  xfs_btree_key_offset(cur, first),
>  				  xfs_btree_key_offset(cur, last + 1) - 1);
> @@ -1171,6 +1307,7 @@ xfs_btree_log_recs(
>  	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
>  	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
>  
> +	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
>  	xfs_trans_log_buf(cur->bc_tp, bp,
>  			  xfs_btree_rec_offset(cur, first),
>  			  xfs_btree_rec_offset(cur, last + 1) - 1);
> @@ -1195,6 +1332,7 @@ xfs_btree_log_ptrs(
>  		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
>  		int			level = xfs_btree_get_level(block);
>  
> +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
>  		xfs_trans_log_buf(cur->bc_tp, bp,
>  				xfs_btree_ptr_offset(cur, first, level),
>  				xfs_btree_ptr_offset(cur, last + 1, level) - 1);
> @@ -1223,7 +1361,12 @@ xfs_btree_log_block(
>  		offsetof(struct xfs_btree_block, bb_numrecs),
>  		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
>  		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
> -		XFS_BTREE_SBLOCK_LEN
> +		offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
> +		offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
> +		offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
> +		offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
> +		offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
> +		XFS_BTREE_SBLOCK_CRC_LEN
>  	};
>  	static const short	loffsets[] = {	/* table of offsets (long) */
>  		offsetof(struct xfs_btree_block, bb_magic),
> @@ -1231,17 +1374,40 @@ xfs_btree_log_block(
>  		offsetof(struct xfs_btree_block, bb_numrecs),
>  		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
>  		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
> -		XFS_BTREE_LBLOCK_LEN
> +		offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
> +		offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
> +		offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
> +		offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
> +		offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
> +		offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
> +		XFS_BTREE_LBLOCK_CRC_LEN
>  	};
>  
>  	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
>  	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
>  
>  	if (bp) {
> +		int nbits;
> +
> +		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
> +			/*
> +			 * We don't log the CRC when updating a btree
> +			 * block but instead recreate it during log
> +			 * recovery.  As the log buffers have checksums
> +			 * of their this is safe and avoids logging a crc
				   own

> +			 * update in a lot of places.
> +			 */
> +			if (fields == XFS_BB_ALL_BITS)
> +				fields = XFS_BB_ALL_BITS_CRC;
> +			nbits = XFS_BB_NUM_BITS_CRC;
> +		} else {
> +			nbits = XFS_BB_NUM_BITS;
> +		}
>  		xfs_btree_offsets(fields,
>  				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
>  					loffsets : soffsets,
> -				  XFS_BB_NUM_BITS, &first, &last);
> +				  nbits, &first, &last);
> +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLF_BTREE_BUF);
>  		xfs_trans_log_buf(cur->bc_tp, bp, first, last);
>  	} else {
>  		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
> @@ -2204,7 +2370,7 @@ xfs_btree_split(
>  		goto error0;
>  
>  	/* Fill in the btree header for the new right block. */
> -	xfs_btree_init_block_cur(cur, xfs_btree_get_level(left), 0, rbp);
> +	xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
>  
>  	/*
>  	 * Split the entries between the old and the new block evenly.
> @@ -2513,7 +2679,7 @@ xfs_btree_new_root(
>  		nptr = 2;
>  	}
>  	/* Fill in the new block's btree header and log it. */
> -	xfs_btree_init_block_cur(cur, cur->bc_nlevels, 2, nbp);
> +	xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
>  	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
>  	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
>  			!xfs_btree_ptr_is_null(cur, &rptr));
> diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
> index f932897..6e6c915 100644
> --- a/fs/xfs/xfs_btree.h
> +++ b/fs/xfs/xfs_btree.h
> @@ -42,11 +42,15 @@ extern kmem_zone_t	*xfs_btree_cur_zone;
>   * Generic btree header.
>   *
>   * This is a combination of the actual format used on disk for short and long
> - * format btrees.  The first three fields are shared by both format, but
> - * the pointers are different and should be used with care.
> + * format btrees.  The first three fields are shared by both format, but the
> + * pointers are different and should be used with care.
>   *
> - * To get the size of the actual short or long form headers please use
> - * the size macros below.  Never use sizeof(xfs_btree_block).
> + * To get the size of the actual short or long form headers please use the size
> + * macros below.  Never use sizeof(xfs_btree_block).
> + *
> + * The blkno, crc, lsn, owner and uuid fields are only available in filesystems
> + * with the crc feature bit, and all accesses to them must be conditional on
> + * that flag.
>   */
>  struct xfs_btree_block {
>  	__be32		bb_magic;	/* magic number for block type */
> @@ -56,10 +60,23 @@ struct xfs_btree_block {
>  		struct {
>  			__be32		bb_leftsib;
>  			__be32		bb_rightsib;
> +
> +			__be64		bb_blkno;
> +			__be64		bb_lsn;
> +			uuid_t		bb_uuid;
> +			__be32		bb_owner;
> +			__le32		bb_crc;
>  		} s;			/* short form pointers */
>  		struct	{
>  			__be64		bb_leftsib;
>  			__be64		bb_rightsib;
> +
> +			__be64		bb_blkno;
> +			__be64		bb_lsn;
> +			uuid_t		bb_uuid;
> +			__be64		bb_owner;
> +			__le32		bb_crc;
> +			__be32		bb_pad; /* padding for alignment */
>  		} l;			/* long form pointers */
>  	} bb_u;				/* rest */
>  };

struct xfs_btree_block {
        __be32          bb_magic;       /* magic number for block type */
        __be16          bb_level;       /* 0 is a leaf */
        __be16          bb_numrecs;     /* current # of data records */
        union {
                struct {
                        __be32          bb_leftsib;
                        __be32          bb_rightsib;
		} s;			/* short form pointers */
                struct {
                        __be32          bb_leftsib;
                        __be32          bb_rightsib;
                        __be64          bb_blkno;
                        __be64          bb_lsn;
                        uuid_t          bb_uuid;
                        __be32          bb_owner;
                        __le32          bb_crc;
                } s_crc;		/* short form pointers with crcs */
                struct  {
                        __be64          bb_leftsib;
                        __be64          bb_rightsib;
		} l;			/* long form pointers */
                struct  {
                        __be64          bb_leftsib;
                        __be64          bb_rightsib;
                        __be64          bb_blkno;
                        __be64          bb_lsn;
                        uuid_t          bb_uuid;
                        __be64          bb_owner;
                        __le32          bb_crc;
                        __be32          bb_pad; /* padding for alignment */
                } l_crc;		/* long form pointers with crcs */
        } bb_u;                         /* rest */
};

> @@ -67,6 +84,16 @@ struct xfs_btree_block {
>  #define XFS_BTREE_SBLOCK_LEN	16	/* size of a short form block */
>  #define XFS_BTREE_LBLOCK_LEN	24	/* size of a long form block */
>  
> +/* sizes of CRC enabled btree blocks */
> +#define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40)
> +#define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48)
> +
> +
> +#define XFS_BTREE_SBLOCK_CRC_OFF \
> +	offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
> +#define XFS_BTREE_LBLOCK_CRC_OFF \
> +	offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
> +
>  
>  /*
>   * Generic key, ptr and record wrapper structures.
> @@ -101,13 +128,11 @@ union xfs_btree_rec {
>  #define	XFS_BB_NUMRECS		0x04
>  #define	XFS_BB_LEFTSIB		0x08
>  #define	XFS_BB_RIGHTSIB		0x10
> +#define	XFS_BB_BLKNO		0x20
>  #define	XFS_BB_NUM_BITS		5
>  #define	XFS_BB_ALL_BITS		((1 << XFS_BB_NUM_BITS) - 1)

Did XFS_BB_BLKNO sneak in from a subsequent patch?

> -
> -/*
> - * Magic numbers for btree blocks.
> - */
> -extern const __uint32_t	xfs_magics[];
> +#define	XFS_BB_NUM_BITS_CRC	8
> +#define	XFS_BB_ALL_BITS_CRC	((1 << XFS_BB_NUM_BITS_CRC) - 1)

I'm a little confused here.  We were using 5 bits, you added BB_BLKNO for 6,
and now you're setting NUM_BITS_CRC to 8?

Probably more readable with the (1<<0) idiom, as below.
 
>  /*
>   * Generic stats interface
> @@ -256,6 +281,7 @@ typedef struct xfs_btree_cur
>  #define XFS_BTREE_LONG_PTRS		(1<<0)	/* pointers are 64bits long */
>  #define XFS_BTREE_ROOT_IN_INODE		(1<<1)	/* root may be variable size */
>  #define XFS_BTREE_LASTREC_UPDATE	(1<<2)	/* track last rec externally */
> +#define XFS_BTREE_CRC_BLOCKS		(1<<3)	/* uses extended btree blocks */
>  
>  
>  #define	XFS_BTREE_NOERROR	0
> @@ -393,8 +419,20 @@ xfs_btree_init_block(
>  	__u32		magic,
>  	__u16		level,
>  	__u16		numrecs,
> +	__u64		owner,
>  	unsigned int	flags);
>  
> +void
> +xfs_btree_init_block_int(
> +	struct xfs_mount	*mp,
> +	struct xfs_btree_block	*buf,
> +	xfs_daddr_t		blkno,
> +	__u32			magic,
> +	__u16			level,
> +	__u16			numrecs,
> +	__u64			owner,
> +	unsigned int		flags);
> +
>  /*
>   * Common btree core entry points.
>   */
> @@ -408,6 +446,14 @@ int xfs_btree_delete(struct xfs_btree_cur *, int *);
>  int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
>  
>  /*
> + * btree block CRC helpers
> + */
> +void xfs_btree_lblock_calc_crc(struct xfs_buf *);
> +bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
> +void xfs_btree_sblock_calc_crc(struct xfs_buf *);
> +bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
> +
> +/*
>   * Internal btree helpers also used by xfs_bmap.c.
>   */
>  void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int);
> diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
> index ee36c88..101ef83 100644
> --- a/fs/xfs/xfs_buf_item.h
> +++ b/fs/xfs/xfs_buf_item.h
> @@ -24,19 +24,33 @@ extern kmem_zone_t	*xfs_buf_item_zone;
>   * This flag indicates that the buffer contains on disk inodes
>   * and requires special recovery handling.
>   */
> -#define	XFS_BLF_INODE_BUF	0x1
> +#define	XFS_BLF_INODE_BUF	(1<<0)
>  /*
>   * This flag indicates that the buffer should not be replayed
>   * during recovery because its blocks are being freed.
>   */
> -#define	XFS_BLF_CANCEL		0x2
> +#define	XFS_BLF_CANCEL		(1<<1)
> +
>  /*
>   * This flag indicates that the buffer contains on disk
>   * user or group dquots and may require special recovery handling.
>   */
> -#define	XFS_BLF_UDQUOT_BUF	0x4
> -#define XFS_BLF_PDQUOT_BUF	0x8
> -#define	XFS_BLF_GDQUOT_BUF	0x10
> +#define	XFS_BLF_UDQUOT_BUF	(1<<2)
> +#define XFS_BLF_PDQUOT_BUF	(1<<3)
> +#define	XFS_BLF_GDQUOT_BUF	(1<<4)
> +
> +/*
> + * all buffers now need flags to tell recovery where the magic number
> + * is so that it can verify and calculate the CRCs on the buffer correctly
> + * once the changes have been replayed into the buffer.
> + */
> +#define XFS_BLF_BTREE_BUF	(1<<5)
> +
> +#define XFS_BLF_TYPE_MASK	\
> +		(XFS_BLF_UDQUOT_BUF | \
> +		 XFS_BLF_PDQUOT_BUF | \
> +		 XFS_BLF_GDQUOT_BUF | \
> +		 XFS_BLF_BTREE_BUF)
>  
>  #define	XFS_BLF_CHUNK		128
>  #define	XFS_BLF_SHIFT		7
> diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
> index 88a3368..6b5bd17 100644
> --- a/fs/xfs/xfs_dinode.h
> +++ b/fs/xfs/xfs_dinode.h
> @@ -107,8 +107,8 @@ typedef enum xfs_dinode_fmt {
>  #define XFS_LITINO(mp, version) \
>  	((int)(((mp)->m_sb.sb_inodesize) - sizeof(struct xfs_dinode)))
>  
> -#define	XFS_BROOT_SIZE_ADJ	\
> -	(XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t))
> +#define XFS_BROOT_SIZE_ADJ(ip) \
> +	(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t))

A good candidate for a separate patch.

>  
>  /*
>   * Inode data & attribute fork sizes, per inode.
> diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
> index 94eaeed..50c43ec 100644
> --- a/fs/xfs/xfs_fsops.c
> +++ b/fs/xfs/xfs_fsops.c
> @@ -316,7 +316,13 @@ xfs_growfs_data_private(
>  			goto error0;
>  		}
>  
> -		xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1, 0);
> +		if (xfs_sb_version_hascrc(&mp->m_sb))
> +			xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
> +						agno, XFS_BTREE_CRC_BLOCKS);
> +		else
> +			xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
> +						agno, 0);
> +
>  		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
>  		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
>  		arec->ar_blockcount = cpu_to_be32(
> @@ -339,7 +345,13 @@ xfs_growfs_data_private(
>  			goto error0;
>  		}
>  
> -		xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1, 0);
> +		if (xfs_sb_version_hascrc(&mp->m_sb))
> +			xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
> +						agno, XFS_BTREE_CRC_BLOCKS);
> +		else
> +			xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
> +						agno, 0);
> +
>  		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
>  		arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
>  		arec->ar_blockcount = cpu_to_be32(
> @@ -363,7 +375,12 @@ xfs_growfs_data_private(
>  			goto error0;
>  		}
>  
> -		xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0, 0);
> +		if (xfs_sb_version_hascrc(&mp->m_sb))
> +			xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
> +						agno, XFS_BTREE_CRC_BLOCKS);
> +		else
> +			xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
> +						agno, 0);
>  
>  		error = xfs_bwrite(bp);
>  		xfs_buf_relse(bp);
> diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
> index bec344b..c82ac88 100644
> --- a/fs/xfs/xfs_ialloc_btree.c
> +++ b/fs/xfs/xfs_ialloc_btree.c
> @@ -34,6 +34,7 @@
>  #include "xfs_alloc.h"
>  #include "xfs_error.h"
>  #include "xfs_trace.h"
> +#include "xfs_cksum.h"
>  
>  
>  STATIC int
> @@ -182,52 +183,88 @@ xfs_inobt_key_diff(
>  			  cur->bc_rec.i.ir_startino;
>  }
>  
> -void
> +static int
>  xfs_inobt_verify(
>  	struct xfs_buf		*bp)
>  {
>  	struct xfs_mount	*mp = bp->b_target->bt_mount;
>  	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
> +	struct xfs_perag	*pag = bp->b_pag;
>  	unsigned int		level;
> -	int			sblock_ok; /* block passes checks */
>  
> -	/* magic number and level verification */
> -	level = be16_to_cpu(block->bb_level);
> -	sblock_ok = block->bb_magic == cpu_to_be32(XFS_IBT_MAGIC) &&
> -		    level < mp->m_in_maxlevels;
> +	/*
> +	 * During growfs operations, we can't verify the exact owner as the
> +	 * perag is not fully initialised and hence not attached to the buffer.
> +	 *
> +	 * Similarly, during log recovery we will have a perag structure
> +	 * attached, but the agi information will not yet have been initialised
> +	 * from the on disk AGI. We don't currently use any of this information,
> +	 * but beware of the landmine (i.e. need to check pag->pagi_init) if we
> +	 * ever do.
> +	 */
> +	switch (block->bb_magic) {
> +	case cpu_to_be32(XFS_IBT_CRC_MAGIC):
> +		if (!xfs_sb_version_hascrc(&mp->m_sb))
> +			return false;
> +		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
> +			return false;
> +		if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
> +			return false;
> +		if (pag &&
> +		    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
> +			return false;
> +		/* fall through */
> +	case cpu_to_be32(XFS_IBT_MAGIC):
> +		break;
> +	default:
> +		return 0;
> +	}
>  
> -	/* numrecs verification */
> -	sblock_ok = sblock_ok &&
> -		be16_to_cpu(block->bb_numrecs) <= mp->m_inobt_mxr[level != 0];
> +	/* numrecs and level verification */
> +	level = be16_to_cpu(block->bb_level);
> +	if (level >= mp->m_in_maxlevels)
> +		return false;
> +	if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
> +		return false;
>  
>  	/* sibling pointer verification */
> -	sblock_ok = sblock_ok &&
> -		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
> -		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
> -		block->bb_u.s.bb_leftsib &&
> -		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
> -		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
> -		block->bb_u.s.bb_rightsib;
> -
> -	if (!sblock_ok) {
> -		trace_xfs_btree_corrupt(bp, _RET_IP_);
> -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
> -		xfs_buf_ioerror(bp, EFSCORRUPTED);
> -	}
> +	if (!block->bb_u.s.bb_leftsib ||
> +	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
> +	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
> +		return false;
> +	if (!block->bb_u.s.bb_rightsib ||
> +	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
> +	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
> +		return false;
> +
> +	return true;
>  }
>  
>  static void
>  xfs_inobt_read_verify(
>  	struct xfs_buf	*bp)
>  {
> -	xfs_inobt_verify(bp);
> +	if (!(xfs_btree_sblock_verify_crc(bp) &&
> +	      xfs_inobt_verify(bp))) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
> +				     bp->b_target->bt_mount, bp->b_addr);
> +		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +	}
>  }
>  
>  static void
>  xfs_inobt_write_verify(
>  	struct xfs_buf	*bp)
>  {
> -	xfs_inobt_verify(bp);
> +	if (!xfs_inobt_verify(bp)) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
> +				     bp->b_target->bt_mount, bp->b_addr);
> +		xfs_buf_ioerror(bp, EFSCORRUPTED);
> +	}
> +	xfs_btree_sblock_calc_crc(bp);
> +
>  }
>  
>  const struct xfs_buf_ops xfs_inobt_buf_ops = {
> @@ -301,6 +338,8 @@ xfs_inobt_init_cursor(
>  	cur->bc_blocklog = mp->m_sb.sb_blocklog;
>  
>  	cur->bc_ops = &xfs_inobt_ops;
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
>  
>  	cur->bc_private.a.agbp = agbp;
>  	cur->bc_private.a.agno = agno;
> diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
> index 25c0239..78dfd1e 100644
> --- a/fs/xfs/xfs_ialloc_btree.h
> +++ b/fs/xfs/xfs_ialloc_btree.h
> @@ -29,7 +29,8 @@ struct xfs_mount;
>  /*
>   * There is a btree for the inode map per allocation group.
>   */
> -#define	XFS_IBT_MAGIC	0x49414254	/* 'IABT' */
> +#define	XFS_IBT_MAGIC		0x49414254	/* 'IABT' */
> +#define	XFS_IBT_CRC_MAGIC	0x4941425c

/* 'IAB?' */

>  
>  typedef	__uint64_t	xfs_inofree_t;
>  #define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t))
> @@ -76,10 +77,10 @@ typedef __be32 xfs_inobt_ptr_t;
>  
>  /*
>   * Btree block header size depends on a superblock flag.
> - *
> - * (not quite yet, but soon)
>   */
> -#define XFS_INOBT_BLOCK_LEN(mp)	XFS_BTREE_SBLOCK_LEN
> +#define XFS_INOBT_BLOCK_LEN(mp) \
> +	(xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
> +		XFS_BTREE_SBLOCK_CRC_LEN : XFS_BTREE_SBLOCK_LEN)
>  
>  /*
>   * Record, key, and pointer address macros for btree blocks.
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 4f20165..202ce37 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -786,6 +786,7 @@ xfs_iformat_btree(
>  	xfs_dinode_t		*dip,
>  	int			whichfork)
>  {
> +	struct xfs_mount	*mp = ip->i_mount;
>  	xfs_bmdr_block_t	*dfp;
>  	xfs_ifork_t		*ifp;
>  	/* REFERENCED */
> @@ -794,7 +795,7 @@ xfs_iformat_btree(
>  
>  	ifp = XFS_IFORK_PTR(ip, whichfork);
>  	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
> -	size = XFS_BMAP_BROOT_SPACE(dfp);
> +	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
>  	nrecs = be16_to_cpu(dfp->bb_numrecs);
>  
>  	/*
> @@ -805,14 +806,14 @@ xfs_iformat_btree(
>  	 * blocks.
>  	 */
>  	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
> -			XFS_IFORK_MAXEXT(ip, whichfork) ||
> +					XFS_IFORK_MAXEXT(ip, whichfork) ||
>  		     XFS_BMDR_SPACE_CALC(nrecs) >
> -			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
> +					XFS_DFORK_SIZE(dip, mp, whichfork) ||
>  		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
> -		xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
> -			(unsigned long long) ip->i_ino);
> +		xfs_warn(mp, "corrupt inode %Lu (btree).",
> +					(unsigned long long) ip->i_ino);
>  		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
> -				 ip->i_mount, dip);
> +					 mp, dip);
>  		return XFS_ERROR(EFSCORRUPTED);
>  	}
>  
> @@ -823,8 +824,7 @@ xfs_iformat_btree(
>  	 * Copy and convert from the on-disk structure
>  	 * to the in-memory structure.
>  	 */
> -	xfs_bmdr_to_bmbt(ip->i_mount, dfp,
> -			 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
> +	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
>  			 ifp->if_broot, size);

changing xfs_bmdr_to_bmbt to take an inode pointer could be it's own patch.

>  	ifp->if_flags &= ~XFS_IFEXTENTS;
>  	ifp->if_flags |= XFS_IFBROOT;
> @@ -2037,7 +2037,7 @@ xfs_iroot_realloc(
>  		 * allocate it now and get out.
>  		 */
>  		if (ifp->if_broot_bytes == 0) {
> -			new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff);
> +			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
>  			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
>  			ifp->if_broot_bytes = (int)new_size;
>  			return;
> @@ -2051,9 +2051,9 @@ xfs_iroot_realloc(
>  		 */
>  		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
>  		new_max = cur_max + rec_diff;
> -		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
> +		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
>  		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
> -				(size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */
> +				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
>  				KM_SLEEP | KM_NOFS);
>  		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
>  						     ifp->if_broot_bytes);
> @@ -2061,7 +2061,7 @@ xfs_iroot_realloc(
>  						     (int)new_size);
>  		ifp->if_broot_bytes = (int)new_size;
>  		ASSERT(ifp->if_broot_bytes <=
> -			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
> +			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
>  		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
>  		return;
>  	}
> @@ -2076,7 +2076,7 @@ xfs_iroot_realloc(
>  	new_max = cur_max + rec_diff;
>  	ASSERT(new_max >= 0);
>  	if (new_max > 0)
> -		new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max);
> +		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
>  	else
>  		new_size = 0;
>  	if (new_size > 0) {
> @@ -2084,7 +2084,8 @@ xfs_iroot_realloc(
>  		/*
>  		 * First copy over the btree block header.
>  		 */
> -		memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN);
> +		memcpy(new_broot, ifp->if_broot,
> +			XFS_BMBT_BLOCK_LEN(ip->i_mount));
>  	} else {
>  		new_broot = NULL;
>  		ifp->if_flags &= ~XFS_IFBROOT;
> @@ -2114,7 +2115,7 @@ xfs_iroot_realloc(
>  	ifp->if_broot = new_broot;
>  	ifp->if_broot_bytes = (int)new_size;
>  	ASSERT(ifp->if_broot_bytes <=
> -		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ);
> +		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip));
>  	return;
>  }
>  
> @@ -2427,7 +2428,7 @@ xfs_iflush_fork(
>  			ASSERT(ifp->if_broot != NULL);
>  			ASSERT(ifp->if_broot_bytes <=
>  			       (XFS_IFORK_SIZE(ip, whichfork) +
> -				XFS_BROOT_SIZE_ADJ));
> +				XFS_BROOT_SIZE_ADJ(ip)));
>  			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
>  				(xfs_bmdr_block_t *)cp,
>  				XFS_DFORK_SIZE(dip, mp, whichfork));
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 96fcbb8..c57a987 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -29,6 +29,7 @@
>  #include "xfs_bmap_btree.h"
>  #include "xfs_alloc_btree.h"
>  #include "xfs_ialloc_btree.h"
> +#include "xfs_btree.h"
>  #include "xfs_dinode.h"
>  #include "xfs_inode.h"
>  #include "xfs_inode_item.h"
> @@ -1929,6 +1930,33 @@ xlog_recover_do_reg_buffer(
>  
>  	/* Shouldn't be any more regions */
>  	ASSERT(i == item->ri_total);
> +
> +	switch (buf_f->blf_flags & XFS_BLF_TYPE_MASK) {
> +	case XFS_BLF_BTREE_BUF:
> +		switch (be32_to_cpu(*(__be32 *)bp->b_addr)) {
> +		case XFS_ABTB_CRC_MAGIC:
> +		case XFS_ABTC_CRC_MAGIC:
> +		case XFS_ABTB_MAGIC:
> +		case XFS_ABTC_MAGIC:
> +			bp->b_ops = &xfs_allocbt_buf_ops;
> +			break;
> +		case XFS_IBT_CRC_MAGIC:
> +		case XFS_IBT_MAGIC:
> +			bp->b_ops = &xfs_inobt_buf_ops;
> +			break;
> +		case XFS_BMAP_CRC_MAGIC:
> +		case XFS_BMAP_MAGIC:
> +			bp->b_ops = &xfs_bmbt_buf_ops;
> +			break;
> +		default:
> +			xfs_warn(mp, "Bad btree block magic!");
> +			ASSERT(0);
> +			break;
> +		}
> +		break;
> +	default:
> +		break;
> +	}
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
> index c6c0601..932de22 100644
> --- a/fs/xfs/xfs_trans.h
> +++ b/fs/xfs/xfs_trans.h
> @@ -503,6 +503,8 @@ void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
>  void		xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
>  void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
>  void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
> +void		xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *,
> +				       uint);
>  void		xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
>  void		xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
>  void		xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
> diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
> index 3edf5db..f950edd 100644
> --- a/fs/xfs/xfs_trans_buf.c
> +++ b/fs/xfs/xfs_trans_buf.c
> @@ -659,6 +659,7 @@ xfs_trans_binval(
>  		ASSERT(XFS_BUF_ISSTALE(bp));
>  		ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
>  		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
> +		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_TYPE_MASK));
>  		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
>  		ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
>  		ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
> @@ -671,6 +672,7 @@ xfs_trans_binval(
>  	bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
>  	bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
>  	bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
> +	bip->__bli_format.blf_flags &= ~XFS_BLF_TYPE_MASK;
>  	for (i = 0; i < bip->bli_format_count; i++) {
>  		memset(bip->bli_formats[i].blf_data_map, 0,
>  		       (bip->bli_formats[i].blf_map_size * sizeof(uint)));
> @@ -751,6 +753,26 @@ xfs_trans_inode_alloc_buf(
>  	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
>  }
>  
> +/*
> + * Set the type of the buffer for log recovery so that it can correctly identify
> + * and hence attach the correct buffer ops to the buffer after replay.
> + */
> +void
> +xfs_trans_buf_set_type(
> +	struct xfs_trans	*tp,
> +	struct xfs_buf		*bp,
> +	uint			type)
> +{
> +	struct xfs_buf_log_item	*bip = bp->b_fspriv;
> +
> +	ASSERT(bp->b_transp == tp);
> +	ASSERT(bip != NULL);
> +	ASSERT(atomic_read(&bip->bli_refcount) > 0);
> +	ASSERT((type & XFS_BLF_TYPE_MASK) != 0);
> +
> +	bip->__bli_format.blf_flags &= ~XFS_BLF_TYPE_MASK;
> +	bip->__bli_format.blf_flags |= type;
> +}
>  
>  /*
>   * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
> @@ -769,14 +791,9 @@ xfs_trans_dquot_buf(
>  	xfs_buf_t	*bp,
>  	uint		type)
>  {
> -	xfs_buf_log_item_t	*bip = bp->b_fspriv;
> -
> -	ASSERT(bp->b_transp == tp);
> -	ASSERT(bip != NULL);
>  	ASSERT(type == XFS_BLF_UDQUOT_BUF ||
>  	       type == XFS_BLF_PDQUOT_BUF ||
>  	       type == XFS_BLF_GDQUOT_BUF);
> -	ASSERT(atomic_read(&bip->bli_refcount) > 0);
>  
> -	bip->__bli_format.blf_flags |= type;
> +	xfs_trans_buf_set_type(tp, bp, type);
>  }
> -- 
> 1.7.10
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux