Re: [PATCH 01/14] xfs: create a per-AG btree to track reference counts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jun 25, 2015 at 04:39:16PM -0700, Darrick J. Wong wrote:
> Create a per-AG btree to track the reference counts of physical blocks
> to support reflink.

Few things from a quick glance...

> --- a/fs/xfs/libxfs/xfs_alloc.c
> +++ b/fs/xfs/libxfs/xfs_alloc.c
> @@ -54,6 +54,8 @@ xfs_extlen_t
>  xfs_prealloc_blocks(
>  	struct xfs_mount	*mp)
>  {
> +	if (xfs_sb_version_hasreflink(&mp->m_sb))
> +		return XFS_RL_BLOCK(mp) + 1;

Should introduce the sb version stuff as a separate patch perhaps
with the basic infrastructure defines (see how I did the first rmap
btree patch).

> @@ -1117,6 +1118,9 @@ xfs_btree_set_refs(
>  	case XFS_BTNUM_RMAP:
>  		xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
>  		break;
> +	case XFS_BTNUM_RL:

Probably better to call it XFS_BTNUM_REFLINK

> diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
> index 48ab2b1..a3f8661 100644
> --- a/fs/xfs/libxfs/xfs_btree.h
> +++ b/fs/xfs/libxfs/xfs_btree.h
> @@ -43,6 +43,7 @@ union xfs_btree_key {
>  	xfs_alloc_key_t			alloc;
>  	struct xfs_inobt_key		inobt;
>  	struct xfs_rmap_key		rmap;
> +	xfs_reflink_key_t		reflink;

No new typedefs. struct xfs_reflink_key...

(only say this once, but applies many times ;)

> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index 9cff517..e4954ab 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -446,9 +446,11 @@ xfs_sb_has_compat_feature(
>  
>  #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */
>  #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
> +#define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflink btree */
>  #define XFS_SB_FEAT_RO_COMPAT_ALL \
>  		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
> -		 XFS_SB_FEAT_RO_COMPAT_RMAPBT)
> +		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
> +		 XFS_SB_FEAT_RO_COMPAT_REFLINK)

The XFS_SB_FEAT_RO_COMPAT_REFLINK flag shoul dbe added as a separate
patch and put last in the series so it is only enabled once
everything is complete.


>  #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
>  static inline bool
>  xfs_sb_has_ro_compat_feature(
> @@ -522,6 +524,12 @@ static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
>  		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
>  }
>  
> +static inline int xfs_sb_version_hasreflink(xfs_sb_t *sbp)

bool.

> @@ -1338,6 +1349,50 @@ typedef __be32 xfs_rmap_ptr_t;
>  	 XFS_IBT_BLOCK(mp) + 1)
>  
>  /*
> + * reflink Btree format definitions
> + *
> + */
> +#define	XFS_RLBT_CRC_MAGIC	0x524C4233	/* 'RLB3' */

#define        XFS_RMAP_CRC_MAGIC      0x524d4233      /* 'RMB3' */

Only one bit difference in the magic numbers, which means they are
too similar. "RFL3" might be better or maybe "R3FL"...


> +/*
> + * Data record/key structure
> + */
> +typedef struct xfs_reflink_rec {
> +	__be32		rr_startblock;	/* starting block number */
> +	__be32		rr_blockcount;	/* count of blocks */
> +	__be32		rr_nlinks;	/* number of inodes linked here */
> +} xfs_reflink_rec_t;
> +
> +typedef struct xfs_reflink_key {
> +	__be32		rr_startblock;	/* starting block number */
> +} xfs_reflink_key_t;
> +
> +typedef struct xfs_reflink_rec_incore {
> +	xfs_agblock_t	rr_startblock;	/* starting block number */
> +	xfs_extlen_t	rr_blockcount;	/* count of free blocks */
> +	xfs_nlink_t	rr_nlinks;	/* number of inodes linked here */
> +} xfs_reflink_rec_incore_t;

We have being using "irec" as shorthand for "in-core record". i.e:
struct xfs_reflink_irec.

(kill typedefs)

> +
> +/*
> + * When a block hits MAXRLCOUNT references, it becomes permanently
> + * stuck in CoW mode, because who knows how many times it's really
> + * referenced.
> + */
> +#define MAXRLCOUNT	((xfs_nlink_t)~0U)
> +#define MAXRLEXTLEN	((xfs_extlen_t)~0U)

I'd suggest that if we hit the maximum count, we just abort the
reflink operation.

> +/* btree pointer type */
> +typedef __be32 xfs_reflink_ptr_t;
> +
> +#define	XFS_RL_BLOCK(mp) \
> +	(xfs_sb_version_hasrmapbt(&((mp)->m_sb)) ? \
> +	 XFS_RMAP_BLOCK(mp) + 1 : \
> +	 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
> +	  XFS_FIBT_BLOCK(mp) + 1 : \
> +	  XFS_IBT_BLOCK(mp) + 1))

That's getting unwieldy. It's large enough for a function....

> +#ifdef REFLINK_DEBUG
> +# define dbg_printk(f, a...)  do {printk(KERN_ERR f, ## a); } while (0)
> +#else
> +# define dbg_printk(f, a...)
> +#endif

xfs_debug() is your friend.

> +#define CHECK_AG_NUMBER(mp, agno) \
> +	do { \
> +		ASSERT((agno) != NULLAGNUMBER); \
> +		ASSERT((agno) < (mp)->m_sb.sb_agcount); \
> +	} while(0);

Ugh. Used once, just open code.

> +#define CHECK_AG_EXTENT(mp, agbno, len) \
> +	do { \
> +		ASSERT((agbno) != NULLAGBLOCK); \
> +		ASSERT((len) > 0); \
> +		ASSERT((unsigned long long)(agbno) + (len) <= \
> +				(mp)->m_sb.sb_agblocks); \
> +	} while(0);

These are really used in places where corruption checks are
warranted, or the extent has already been checked....

> +#define XFS_WANT_CORRUPTED_RLEXT_GOTO(mp, have, agbno, len, nr, label) \
> +	do { \
> +		XFS_WANT_CORRUPTED_GOTO((mp), (have) == 1, label); \
> +		XFS_WANT_CORRUPTED_GOTO((mp), (len) > 0, label); \
> +		XFS_WANT_CORRUPTED_GOTO((mp), (nr) >= 2, label); \
> +		XFS_WANT_CORRUPTED_GOTO((mp), (unsigned long long)(agbno) + \
> +				(len) <= (mp)->m_sb.sb_agblocks, label); \
> +	} while(0);

Unused.

> +
> +STATIC int
> +xfs_reflinkbt_alloc_block(
> +	struct xfs_btree_cur	*cur,
> +	union xfs_btree_ptr	*start,
> +	union xfs_btree_ptr	*new,
> +	int			*stat)
> +{
> +	int			error;
> +	xfs_agblock_t		bno;
> +
> +	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
> +
> +	/* Allocate the new block from the freelist. If we can't, give up.  */
> +	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
> +				       &bno, 1);
> +	if (error) {
> +		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
> +		return error;
> +	}

Why does the reflink btree use the free list? Why can't it use
block allocation like the BMBT tree?

> +/*
> + * Allocate a new reflink btree cursor.
> + */
> +struct xfs_btree_cur *			/* new reflink btree cursor */
> +xfs_reflinkbt_init_cursor(
> +	struct xfs_mount	*mp,		/* file system mount point */
> +	struct xfs_trans	*tp,		/* transaction pointer */
> +	struct xfs_buf		*agbp,		/* buffer for agf structure */
> +	xfs_agnumber_t		agno)		/* allocation group number */

No real need for these comments on the variables. They are redundant
as the code documents what they are just fine.

> +{
> +	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
> +	struct xfs_btree_cur	*cur;
> +
> +	CHECK_AG_NUMBER(mp, agno);
> +	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
> +
> +	cur->bc_tp = tp;
> +	cur->bc_mp = mp;
> +	cur->bc_btnum = XFS_BTNUM_RL;
> +	cur->bc_blocklog = mp->m_sb.sb_blocklog;
> +	cur->bc_ops = &xfs_reflinkbt_ops;
> +
> +	cur->bc_nlevels = be32_to_cpu(agf->agf_reflink_level);
> +
> +	cur->bc_private.a.agbp = agbp;
> +	cur->bc_private.a.agno = agno;
> +
> +	if (xfs_sb_version_hascrc(&mp->m_sb))
> +		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;

Can be set unconditionally.

The next set of functions normally go into a different file. i.e the
"xfs_foo_btree.c" file contains the functions required by the
generic btree abstraction to implement the "foo" btree format.  The
file "xfs_foo.c" then contains the code/logic that provides the
external foo API, manages the information inthe foo btree, and calls
the generic btree functions to manage the btree. This logic isn't
present in the patch, so really it shoul dbe added by the patch that
starts implementing the reflink API....

> +/*
> + * Get the data from the pointed-to record.
> + */
> +int					/* error */
> +xfs_reflink_get_rec(
> +	struct xfs_btree_cur	*cur,	/* btree cursor */
> +	xfs_agblock_t		*bno,	/* output: starting block of extent */
> +	xfs_extlen_t		*len,	/* output: length of extent */
> +	xfs_nlink_t		*nlink,	/* output: number of links */
> +	int			*stat)	/* output: success/failure */
> +{
> +	union xfs_btree_rec	*rec;
> +	int			error;
> +
> +	error = xfs_btree_get_rec(cur, &rec, stat);
> +	if (!error && *stat == 1) {
> +		CHECK_AG_EXTENT(cur->bc_mp,
> +			be32_to_cpu(rec->reflink.rr_startblock),
> +			be32_to_cpu(rec->reflink.rr_blockcount));
> +		*bno = be32_to_cpu(rec->reflink.rr_startblock);
> +		*len = be32_to_cpu(rec->reflink.rr_blockcount);
> +		*nlink = be32_to_cpu(rec->reflink.rr_nlinks);
> +	}
> +	return error;

	if (error || !*stat)
		return error;
	.....
	return 0;

> +	error = xfs_reflink_get_rec(cur, &bno, &len, &nr, &x);
> +	if (error)
> +		return error;
> +	XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, x == 1, error0);
> +	error = xfs_btree_delete(cur, i);
> +	if (error)
> +		return error;
> +	error = xfs_reflink_lookup_ge(cur, bno, &x);
> +error0:

New code should use sane jump labels. e.g. "out_error" is a pretty
standard jump label name for this...

> diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
> index 88efbb4..d1de74e 100644
> --- a/fs/xfs/libxfs/xfs_shared.h
> +++ b/fs/xfs/libxfs/xfs_shared.h
> @@ -216,6 +216,7 @@ int	xfs_log_calc_minimum_size(struct xfs_mount *);
>  #define	XFS_INO_REF		2
>  #define	XFS_ATTR_BTREE_REF	1
>  #define	XFS_DQUOT_REF		1
> +#define XFS_REFLINK_BTREE_REF	1

whitespace.

> @@ -315,6 +317,9 @@ typedef struct xfs_perag {
>  	/* for rcu-safe freeing */
>  	struct rcu_head	rcu_head;
>  	int		pagb_count;	/* pagb slots in use */
> +
> +	/* reflink */
> +	__uint8_t	pagf_reflink_level;

May as well just make it the same as what is on disk (i.e.
uint32_t).

> +++ b/fs/xfs/xfs_stats.c
> @@ -61,6 +61,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
>  		{ "ibt2",		XFSSTAT_END_IBT_V2		},
>  		{ "fibt2",		XFSSTAT_END_FIBT_V2		},
>  		{ "rmapbt",		XFSSTAT_END_RMAP_V2		},
> +		{ "rlbt2",		XFSSTAT_END_RLBT_V2		},

"reflinkbt". No need for the "2", as there is only one set of
reflink btree stats.

Cheers,

Dave.
-- 
Dave Chinner
david@xxxxxxxxxxxxx

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux