Re: [PATCH 06/63] xfs: define the on-disk refcount btree format

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Sep 27, 2016 at 07:54:08PM -0700, Darrick J. Wong wrote:
> Start constructing the refcount btree implementation by establishing
> the on-disk format and everything needed to read, write, and
> manipulate the refcount btree blocks.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  fs/xfs/Makefile                    |    1 
>  fs/xfs/libxfs/xfs_btree.c          |    3 +
>  fs/xfs/libxfs/xfs_btree.h          |   12 ++
>  fs/xfs/libxfs/xfs_format.h         |   32 ++++++
>  fs/xfs/libxfs/xfs_refcount_btree.c |  178 ++++++++++++++++++++++++++++++++++++
>  fs/xfs/libxfs/xfs_refcount_btree.h |   67 ++++++++++++++
>  fs/xfs/libxfs/xfs_sb.c             |    9 ++
>  fs/xfs/libxfs/xfs_shared.h         |    2 
>  fs/xfs/libxfs/xfs_trans_resv.c     |    2 
>  fs/xfs/libxfs/xfs_trans_resv.h     |    1 
>  fs/xfs/xfs_mount.c                 |    2 
>  fs/xfs/xfs_mount.h                 |    3 +
>  fs/xfs/xfs_ondisk.h                |    3 +
>  fs/xfs/xfs_trace.h                 |   11 --
>  14 files changed, 315 insertions(+), 11 deletions(-)
>  create mode 100644 fs/xfs/libxfs/xfs_refcount_btree.c
>  create mode 100644 fs/xfs/libxfs/xfs_refcount_btree.h
> 
> 
...
> diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
> index 622055b..0c194fc 100644
> --- a/fs/xfs/libxfs/xfs_format.h
> +++ b/fs/xfs/libxfs/xfs_format.h
> @@ -1457,6 +1457,38 @@ typedef __be32 xfs_rmap_ptr_t;
>  
>  unsigned int xfs_refc_block(struct xfs_mount *mp);
>  
> +/*
> + * Data record/key structure
> + *
> + * Each record associates a range of physical blocks (starting at
> + * rc_startblock and ending rc_blockcount blocks later) with a
> + * reference count (rc_refcount).  A record is only stored in the
> + * btree if the refcount is > 2.  An entry in the free block btree

Should this be >= 2?

Brian

> + * means that the refcount is 0, and no entries anywhere means that
> + * the refcount is 1, as was true in XFS before reflinking.
> + */
> +struct xfs_refcount_rec {
> +	__be32		rc_startblock;	/* starting block number */
> +	__be32		rc_blockcount;	/* count of blocks */
> +	__be32		rc_refcount;	/* number of inodes linked here */
> +};
> +
> +struct xfs_refcount_key {
> +	__be32		rc_startblock;	/* starting block number */
> +};
> +
> +struct xfs_refcount_irec {
> +	xfs_agblock_t	rc_startblock;	/* starting block number */
> +	xfs_extlen_t	rc_blockcount;	/* count of free blocks */
> +	xfs_nlink_t	rc_refcount;	/* number of inodes linked here */
> +};
> +
> +#define MAXREFCOUNT	((xfs_nlink_t)~0U)
> +#define MAXREFCEXTLEN	((xfs_extlen_t)~0U)
> +
> +/* btree pointer type */
> +typedef __be32 xfs_refcount_ptr_t;
> +
>  
>  /*
>   * BMAP Btree format definitions
> diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
> new file mode 100644
> index 0000000..359cf0c
> --- /dev/null
> +++ b/fs/xfs/libxfs/xfs_refcount_btree.c
> @@ -0,0 +1,178 @@
> +/*
> + * Copyright (C) 2016 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_sb.h"
> +#include "xfs_mount.h"
> +#include "xfs_btree.h"
> +#include "xfs_bmap.h"
> +#include "xfs_refcount_btree.h"
> +#include "xfs_alloc.h"
> +#include "xfs_error.h"
> +#include "xfs_trace.h"
> +#include "xfs_cksum.h"
> +#include "xfs_trans.h"
> +#include "xfs_bit.h"
> +
> +static struct xfs_btree_cur *
> +xfs_refcountbt_dup_cursor(
> +	struct xfs_btree_cur	*cur)
> +{
> +	return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp,
> +			cur->bc_private.a.agbp, cur->bc_private.a.agno,
> +			cur->bc_private.a.dfops);
> +}
> +
> +STATIC bool
> +xfs_refcountbt_verify(
> +	struct xfs_buf		*bp)
> +{
> +	struct xfs_mount	*mp = bp->b_target->bt_mount;
> +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
> +	struct xfs_perag	*pag = bp->b_pag;
> +	unsigned int		level;
> +
> +	if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC))
> +		return false;
> +
> +	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> +		return false;
> +	if (!xfs_btree_sblock_v5hdr_verify(bp))
> +		return false;
> +
> +	level = be16_to_cpu(block->bb_level);
> +	if (pag && pag->pagf_init) {
> +		if (level >= pag->pagf_refcount_level)
> +			return false;
> +	} else if (level >= mp->m_refc_maxlevels)
> +		return false;
> +
> +	return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]);
> +}
> +
> +STATIC void
> +xfs_refcountbt_read_verify(
> +	struct xfs_buf	*bp)
> +{
> +	if (!xfs_btree_sblock_verify_crc(bp))
> +		xfs_buf_ioerror(bp, -EFSBADCRC);
> +	else if (!xfs_refcountbt_verify(bp))
> +		xfs_buf_ioerror(bp, -EFSCORRUPTED);
> +
> +	if (bp->b_error) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		xfs_verifier_error(bp);
> +	}
> +}
> +
> +STATIC void
> +xfs_refcountbt_write_verify(
> +	struct xfs_buf	*bp)
> +{
> +	if (!xfs_refcountbt_verify(bp)) {
> +		trace_xfs_btree_corrupt(bp, _RET_IP_);
> +		xfs_buf_ioerror(bp, -EFSCORRUPTED);
> +		xfs_verifier_error(bp);
> +		return;
> +	}
> +	xfs_btree_sblock_calc_crc(bp);
> +
> +}
> +
> +const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
> +	.name			= "xfs_refcountbt",
> +	.verify_read		= xfs_refcountbt_read_verify,
> +	.verify_write		= xfs_refcountbt_write_verify,
> +};
> +
> +static const struct xfs_btree_ops xfs_refcountbt_ops = {
> +	.rec_len		= sizeof(struct xfs_refcount_rec),
> +	.key_len		= sizeof(struct xfs_refcount_key),
> +
> +	.dup_cursor		= xfs_refcountbt_dup_cursor,
> +	.buf_ops		= &xfs_refcountbt_buf_ops,
> +};
> +
> +/*
> + * Allocate a new refcount btree cursor.
> + */
> +struct xfs_btree_cur *
> +xfs_refcountbt_init_cursor(
> +	struct xfs_mount	*mp,
> +	struct xfs_trans	*tp,
> +	struct xfs_buf		*agbp,
> +	xfs_agnumber_t		agno,
> +	struct xfs_defer_ops	*dfops)
> +{
> +	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
> +	struct xfs_btree_cur	*cur;
> +
> +	ASSERT(agno != NULLAGNUMBER);
> +	ASSERT(agno < mp->m_sb.sb_agcount);
> +	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
> +
> +	cur->bc_tp = tp;
> +	cur->bc_mp = mp;
> +	cur->bc_btnum = XFS_BTNUM_REFC;
> +	cur->bc_blocklog = mp->m_sb.sb_blocklog;
> +	cur->bc_ops = &xfs_refcountbt_ops;
> +
> +	cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);
> +
> +	cur->bc_private.a.agbp = agbp;
> +	cur->bc_private.a.agno = agno;
> +	cur->bc_private.a.dfops = dfops;
> +	cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
> +
> +	cur->bc_private.a.priv.refc.nr_ops = 0;
> +	cur->bc_private.a.priv.refc.shape_changes = 0;
> +
> +	return cur;
> +}
> +
> +/*
> + * Calculate the number of records in a refcount btree block.
> + */
> +int
> +xfs_refcountbt_maxrecs(
> +	struct xfs_mount	*mp,
> +	int			blocklen,
> +	bool			leaf)
> +{
> +	blocklen -= XFS_REFCOUNT_BLOCK_LEN;
> +
> +	if (leaf)
> +		return blocklen / sizeof(struct xfs_refcount_rec);
> +	return blocklen / (sizeof(struct xfs_refcount_key) +
> +			   sizeof(xfs_refcount_ptr_t));
> +}
> +
> +/* Compute the maximum height of a refcount btree. */
> +void
> +xfs_refcountbt_compute_maxlevels(
> +	struct xfs_mount		*mp)
> +{
> +	mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(mp,
> +			mp->m_refc_mnr, mp->m_sb.sb_agblocks);
> +}
> diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
> new file mode 100644
> index 0000000..9e9ad7c
> --- /dev/null
> +++ b/fs/xfs/libxfs/xfs_refcount_btree.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright (C) 2016 Oracle.  All Rights Reserved.
> + *
> + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it would be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write the Free Software Foundation,
> + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +#ifndef __XFS_REFCOUNT_BTREE_H__
> +#define	__XFS_REFCOUNT_BTREE_H__
> +
> +/*
> + * Reference Count Btree on-disk structures
> + */
> +
> +struct xfs_buf;
> +struct xfs_btree_cur;
> +struct xfs_mount;
> +
> +/*
> + * Btree block header size
> + */
> +#define XFS_REFCOUNT_BLOCK_LEN	XFS_BTREE_SBLOCK_CRC_LEN
> +
> +/*
> + * Record, key, and pointer address macros for btree blocks.
> + *
> + * (note that some of these may appear unused, but they are used in userspace)
> + */
> +#define XFS_REFCOUNT_REC_ADDR(block, index) \
> +	((struct xfs_refcount_rec *) \
> +		((char *)(block) + \
> +		 XFS_REFCOUNT_BLOCK_LEN + \
> +		 (((index) - 1) * sizeof(struct xfs_refcount_rec))))
> +
> +#define XFS_REFCOUNT_KEY_ADDR(block, index) \
> +	((struct xfs_refcount_key *) \
> +		((char *)(block) + \
> +		 XFS_REFCOUNT_BLOCK_LEN + \
> +		 ((index) - 1) * sizeof(struct xfs_refcount_key)))
> +
> +#define XFS_REFCOUNT_PTR_ADDR(block, index, maxrecs) \
> +	((xfs_refcount_ptr_t *) \
> +		((char *)(block) + \
> +		 XFS_REFCOUNT_BLOCK_LEN + \
> +		 (maxrecs) * sizeof(struct xfs_refcount_key) + \
> +		 ((index) - 1) * sizeof(xfs_refcount_ptr_t)))
> +
> +extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp,
> +		struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno,
> +		struct xfs_defer_ops *dfops);
> +extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen,
> +		bool leaf);
> +extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
> +
> +#endif	/* __XFS_REFCOUNT_BTREE_H__ */
> diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
> index 4aecc5f..a70aec9 100644
> --- a/fs/xfs/libxfs/xfs_sb.c
> +++ b/fs/xfs/libxfs/xfs_sb.c
> @@ -38,6 +38,8 @@
>  #include "xfs_ialloc_btree.h"
>  #include "xfs_log.h"
>  #include "xfs_rmap_btree.h"
> +#include "xfs_bmap.h"
> +#include "xfs_refcount_btree.h"
>  
>  /*
>   * Physical superblock buffer manipulations. Shared with libxfs in userspace.
> @@ -737,6 +739,13 @@ xfs_sb_mount_common(
>  	mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
>  	mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
>  
> +	mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize,
> +			true);
> +	mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize,
> +			false);
> +	mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2;
> +	mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
> +
>  	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
>  	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
>  					sbp->sb_inopblock);
> diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
> index 0c5b30b..c6f4eb4 100644
> --- a/fs/xfs/libxfs/xfs_shared.h
> +++ b/fs/xfs/libxfs/xfs_shared.h
> @@ -39,6 +39,7 @@ extern const struct xfs_buf_ops xfs_agf_buf_ops;
>  extern const struct xfs_buf_ops xfs_agfl_buf_ops;
>  extern const struct xfs_buf_ops xfs_allocbt_buf_ops;
>  extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
> +extern const struct xfs_buf_ops xfs_refcountbt_buf_ops;
>  extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
>  extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
>  extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
> @@ -122,6 +123,7 @@ int	xfs_log_calc_minimum_size(struct xfs_mount *);
>  #define	XFS_INO_REF		2
>  #define	XFS_ATTR_BTREE_REF	1
>  #define	XFS_DQUOT_REF		1
> +#define	XFS_REFC_BTREE_REF	1
>  
>  /*
>   * Flags for xfs_trans_ichgtime().
> diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
> index 301ef2f..7c840e1 100644
> --- a/fs/xfs/libxfs/xfs_trans_resv.c
> +++ b/fs/xfs/libxfs/xfs_trans_resv.c
> @@ -73,7 +73,7 @@ xfs_calc_buf_res(
>   *
>   * Keep in mind that max depth is calculated separately for each type of tree.
>   */
> -static uint
> +uint
>  xfs_allocfree_log_count(
>  	struct xfs_mount *mp,
>  	uint		num_ops)
> diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h
> index 0eb46ed..36a1511 100644
> --- a/fs/xfs/libxfs/xfs_trans_resv.h
> +++ b/fs/xfs/libxfs/xfs_trans_resv.h
> @@ -102,5 +102,6 @@ struct xfs_trans_resv {
>  #define	XFS_ATTRRM_LOG_COUNT		3
>  
>  void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
> +uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops);
>  
>  #endif	/* __XFS_TRANS_RESV_H__ */
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 56e85a6..3f64615 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -43,6 +43,7 @@
>  #include "xfs_icache.h"
>  #include "xfs_sysfs.h"
>  #include "xfs_rmap_btree.h"
> +#include "xfs_refcount_btree.h"
>  
>  
>  static DEFINE_MUTEX(xfs_uuid_table_mutex);
> @@ -684,6 +685,7 @@ xfs_mountfs(
>  	xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
>  	xfs_ialloc_compute_maxlevels(mp);
>  	xfs_rmapbt_compute_maxlevels(mp);
> +	xfs_refcountbt_compute_maxlevels(mp);
>  
>  	xfs_set_maxicount(mp);
>  
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 8fab496..0be14a7 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -124,10 +124,13 @@ typedef struct xfs_mount {
>  	uint			m_inobt_mnr[2];	/* min inobt btree records */
>  	uint			m_rmap_mxr[2];	/* max rmap btree records */
>  	uint			m_rmap_mnr[2];	/* min rmap btree records */
> +	uint			m_refc_mxr[2];	/* max refc btree records */
> +	uint			m_refc_mnr[2];	/* min refc btree records */
>  	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
>  	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
>  	uint			m_in_maxlevels;	/* max inobt btree levels. */
>  	uint			m_rmap_maxlevels; /* max rmap btree levels */
> +	uint			m_refc_maxlevels; /* max refcount btree level */
>  	xfs_extlen_t		m_ag_prealloc_blocks; /* reserved ag blocks */
>  	uint			m_alloc_set_aside; /* space we can't use */
>  	uint			m_ag_max_usable; /* max space per AG */
> diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
> index 69e2986..0c381d7 100644
> --- a/fs/xfs/xfs_ondisk.h
> +++ b/fs/xfs/xfs_ondisk.h
> @@ -49,6 +49,8 @@ xfs_check_ondisk_structs(void)
>  	XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr,		56);
>  	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key,		4);
>  	XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec,		16);
> +	XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key,		4);
> +	XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec,		12);
>  	XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key,		20);
>  	XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec,		24);
>  	XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp,		8);
> @@ -56,6 +58,7 @@ xfs_check_ondisk_structs(void)
>  	XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t,			4);
>  	XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t,			8);
>  	XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t,			4);
> +	XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t,		4);
>  	XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t,			4);
>  
>  	/* dir/attr trees */
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index ee439e5..631c114 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -39,16 +39,7 @@ struct xfs_buf_log_format;
>  struct xfs_inode_log_format;
>  struct xfs_bmbt_irec;
>  struct xfs_btree_cur;
> -
> -#ifndef XFS_REFCOUNT_IREC_PLACEHOLDER
> -#define XFS_REFCOUNT_IREC_PLACEHOLDER
> -/* Placeholder definition to avoid breaking bisectability. */
> -struct xfs_refcount_irec {
> -	xfs_agblock_t	rc_startblock;	/* starting block number */
> -	xfs_extlen_t	rc_blockcount;	/* count of free blocks */
> -	xfs_nlink_t	rc_refcount;	/* number of inodes linked here */
> -};
> -#endif
> +struct xfs_refcount_irec;
>  
>  DECLARE_EVENT_CLASS(xfs_attr_list_class,
>  	TP_PROTO(struct xfs_attr_list_context *ctx),
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux