Re: [PATCH 032/119] xfs: add rmap btree operations

Brian Foster <bfoster@xxxxxxxxxx> · Fri, 8 Jul 2016 14:33:47 -0400

On Thu, Jun 16, 2016 at 06:21:17PM -0700, Darrick J. Wong wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Implement the generic btree operations needed to manipulate rmap
> btree blocks. This is very similar to the per-ag freespace btree
> implementation, and uses the AGFL for allocation and freeing of
> blocks.
> 
> Adapt the rmap btree to store owner offsets within each rmap record,
> and to handle the primary key being redefined as the tuple
> [agblk, owner, offset].  The expansion of the primary key is crucial
> to allowing multiple owners per extent.
> 
> [darrick: adapt the btree ops to deal with offsets]
> [darrick: remove init_rec_from_key]
> [darrick: move unwritten bit to rm_offset]
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> Reviewed-by: Dave Chinner <dchinner@xxxxxxxxxx>
> Signed-off-by: Dave Chinner <david@xxxxxxxxxxxxx>
> ---
>  fs/xfs/libxfs/xfs_btree.h      |    1 
>  fs/xfs/libxfs/xfs_rmap.c       |   96 ++++++++++++++++
>  fs/xfs/libxfs/xfs_rmap_btree.c |  243 ++++++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_rmap_btree.h |    9 +
>  fs/xfs/xfs_trace.h             |    3 
>  5 files changed, 352 insertions(+)
> 
> 
> diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
> index 90ea2a7..9963c48 100644
> --- a/fs/xfs/libxfs/xfs_btree.h
> +++ b/fs/xfs/libxfs/xfs_btree.h
> @@ -216,6 +216,7 @@ union xfs_btree_irec {
>  	xfs_alloc_rec_incore_t		a;
>  	xfs_bmbt_irec_t			b;
>  	xfs_inobt_rec_incore_t		i;
> +	struct xfs_rmap_irec		r;
>  };
>  
>  /*
> diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
> index d1fd471..c6a5a0b 100644
> --- a/fs/xfs/libxfs/xfs_rmap.c
> +++ b/fs/xfs/libxfs/xfs_rmap.c
> @@ -37,6 +37,102 @@
>  #include "xfs_error.h"
>  #include "xfs_extent_busy.h"
>  
...
> +/*
> + * Update the record referred to by cur to the value given
> + * by [bno, len, owner, offset].
> + * This either works (return 0) or gets an EFSCORRUPTED error.
> + */
> +STATIC int
> +xfs_rmap_update(

This throws an unused warning, but I assume it will be used later.

> +	struct xfs_btree_cur	*cur,
> +	struct xfs_rmap_irec	*irec)
> +{
> +	union xfs_btree_rec	rec;
> +
> +	rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
> +	rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
> +	rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
> +	rec.rmap.rm_offset = cpu_to_be64(
> +			xfs_rmap_irec_offset_pack(irec));
> +	return xfs_btree_update(cur, &rec);
> +}
> +
...
>  int
>  xfs_rmap_free(
>  	struct xfs_trans	*tp,
> diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
> index 7a35c78..c50c725 100644
> --- a/fs/xfs/libxfs/xfs_rmap_btree.c
> +++ b/fs/xfs/libxfs/xfs_rmap_btree.c
...
> @@ -43,6 +68,173 @@ xfs_rmapbt_dup_cursor(
>  			cur->bc_private.a.agbp, cur->bc_private.a.agno);
>  }
>  
> +STATIC void
> +xfs_rmapbt_set_root(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_ptr	*ptr,
> +	int			inc)
> +{
> +	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
> +	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
> +	xfs_agnumber_t		seqno = be32_to_cpu(agf->agf_seqno);
> +	int			btnum = cur->bc_btnum;
> +	struct xfs_perag	*pag = xfs_perag_get(cur->bc_mp, seqno);
> +
> +	ASSERT(ptr->s != 0);
> +
> +	agf->agf_roots[btnum] = ptr->s;
> +	be32_add_cpu(&agf->agf_levels[btnum], inc);
> +	pag->pagf_levels[btnum] += inc;
> +	xfs_perag_put(pag);
> +
> +	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
> +}
> +
> +STATIC int
> +xfs_rmapbt_alloc_block(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_ptr	*start,
> +	union xfs_btree_ptr	*new,
> +	int			*stat)
> +{
> +	int			error;
> +	xfs_agblock_t		bno;
> +
> +	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
> +
> +	/* Allocate the new block from the freelist. If we can't, give up.  */
> +	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
> +				       &bno, 1);
> +	if (error) {
> +		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
> +		return error;
> +	}
> +
> +	trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
> +			bno, 1);
> +	if (bno == NULLAGBLOCK) {
> +		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
> +		*stat = 0;
> +		return 0;
> +	}
> +
> +	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
> +			false);
> +
> +	xfs_trans_agbtree_delta(cur->bc_tp, 1);
> +	new->s = cpu_to_be32(bno);
> +
> +	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
> +	*stat = 1;
> +	return 0;
> +}
> +
> +STATIC int
> +xfs_rmapbt_free_block(
> +	struct xfs_btree_cur	*cur,
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
> +	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
> +	xfs_agblock_t		bno;
> +	int			error;
> +
> +	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
> +	trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
> +			bno, 1);
> +	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
> +	if (error)
> +		return error;
> +
> +	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
> +			      XFS_EXTENT_BUSY_SKIP_DISCARD);
> +	xfs_trans_agbtree_delta(cur->bc_tp, -1);
> +
> +	xfs_trans_binval(cur->bc_tp, bp);

This is handled in the generic btree code.

> +	return 0;
> +}
> +
...
> @@ -117,12 +309,63 @@ const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
>  	.verify_write		= xfs_rmapbt_write_verify,
>  };
>  
> +#if defined(DEBUG) || defined(XFS_WARN)
> +STATIC int
> +xfs_rmapbt_keys_inorder(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_key	*k1,
> +	union xfs_btree_key	*k2)
> +{
> +	if (be32_to_cpu(k1->rmap.rm_startblock) <
> +	    be32_to_cpu(k2->rmap.rm_startblock))
> +		return 1;
> +	if (be64_to_cpu(k1->rmap.rm_owner) <
> +	    be64_to_cpu(k2->rmap.rm_owner))
> +		return 1;
> +	if (XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)) <=
> +	    XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)))
> +		return 1;
> +	return 0;

I might just not be familiar enough with the rmapbt ordering rules, but
this doesn't look right. If the rm_startblock values are out of order
(k1 startblock > k2 startblock), but either of the owner or offset
values are in-order, then we call the keys in order. Is that intentional
or should (k1->rmap.rm_startblock > k2->rmap.rm_startblock) always
return 0?

> +}
> +
> +STATIC int
> +xfs_rmapbt_recs_inorder(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_rec	*r1,
> +	union xfs_btree_rec	*r2)
> +{
> +	if (be32_to_cpu(r1->rmap.rm_startblock) <
> +	    be32_to_cpu(r2->rmap.rm_startblock))
> +		return 1;
> +	if (XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)) <
> +	    XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)))
> +		return 1;
> +	if (be64_to_cpu(r1->rmap.rm_owner) <=
> +	    be64_to_cpu(r2->rmap.rm_owner))
> +		return 1;
> +	return 0;
> +}

Same question here.

Brian

> +#endif	/* DEBUG */
> +
>  static const struct xfs_btree_ops xfs_rmapbt_ops = {
>  	.rec_len		= sizeof(struct xfs_rmap_rec),
>  	.key_len		= sizeof(struct xfs_rmap_key),
>  
>  	.dup_cursor		= xfs_rmapbt_dup_cursor,
> +	.set_root		= xfs_rmapbt_set_root,
> +	.alloc_block		= xfs_rmapbt_alloc_block,
> +	.free_block		= xfs_rmapbt_free_block,
> +	.get_minrecs		= xfs_rmapbt_get_minrecs,
> +	.get_maxrecs		= xfs_rmapbt_get_maxrecs,
> +	.init_key_from_rec	= xfs_rmapbt_init_key_from_rec,
> +	.init_rec_from_cur	= xfs_rmapbt_init_rec_from_cur,
> +	.init_ptr_from_cur	= xfs_rmapbt_init_ptr_from_cur,
> +	.key_diff		= xfs_rmapbt_key_diff,
>  	.buf_ops		= &xfs_rmapbt_buf_ops,
> +#if defined(DEBUG) || defined(XFS_WARN)
> +	.keys_inorder		= xfs_rmapbt_keys_inorder,
> +	.recs_inorder		= xfs_rmapbt_recs_inorder,
> +#endif
>  };
>  
>  /*
> diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
> index 462767f..17fa383 100644
> --- a/fs/xfs/libxfs/xfs_rmap_btree.h
> +++ b/fs/xfs/libxfs/xfs_rmap_btree.h
> @@ -52,6 +52,15 @@ struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
>  int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
>  extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
>  
> +int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
> +		xfs_extlen_t len, uint64_t owner, uint64_t offset,
> +		unsigned int flags, int *stat);
> +int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
> +		xfs_extlen_t len, uint64_t owner, uint64_t offset,
> +		unsigned int flags, int *stat);
> +int xfs_rmap_get_rec(struct xfs_btree_cur *cur, struct xfs_rmap_irec *irec,
> +		int *stat);
> +
>  int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp,
>  		   xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len,
>  		   struct xfs_owner_info *oinfo);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index b4ee9c8..28bd991 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -2470,6 +2470,9 @@ DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent);
>  DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent_done);
>  DEFINE_RMAP_EVENT(xfs_rmap_alloc_extent_error);
>  
> +DEFINE_BUSY_EVENT(xfs_rmapbt_alloc_block);
> +DEFINE_BUSY_EVENT(xfs_rmapbt_free_block);
> +
>  #endif /* _TRACE_XFS_H */
>  
>  #undef TRACE_INCLUDE_PATH
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html