On Wed, Sep 28, 2016 at 12:20:01PM -0400, Brian Foster wrote: > On Tue, Sep 27, 2016 at 07:54:08PM -0700, Darrick J. Wong wrote: > > Start constructing the refcount btree implementation by establishing > > the on-disk format and everything needed to read, write, and > > manipulate the refcount btree blocks. > > > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > > Reviewed-by: Christoph Hellwig <hch@xxxxxx> > > --- > > fs/xfs/Makefile | 1 > > fs/xfs/libxfs/xfs_btree.c | 3 + > > fs/xfs/libxfs/xfs_btree.h | 12 ++ > > fs/xfs/libxfs/xfs_format.h | 32 ++++++ > > fs/xfs/libxfs/xfs_refcount_btree.c | 178 ++++++++++++++++++++++++++++++++++++ > > fs/xfs/libxfs/xfs_refcount_btree.h | 67 ++++++++++++++ > > fs/xfs/libxfs/xfs_sb.c | 9 ++ > > fs/xfs/libxfs/xfs_shared.h | 2 > > fs/xfs/libxfs/xfs_trans_resv.c | 2 > > fs/xfs/libxfs/xfs_trans_resv.h | 1 > > fs/xfs/xfs_mount.c | 2 > > fs/xfs/xfs_mount.h | 3 + > > fs/xfs/xfs_ondisk.h | 3 + > > fs/xfs/xfs_trace.h | 11 -- > > 14 files changed, 315 insertions(+), 11 deletions(-) > > create mode 100644 fs/xfs/libxfs/xfs_refcount_btree.c > > create mode 100644 fs/xfs/libxfs/xfs_refcount_btree.h > > > > > ... > > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > > index 622055b..0c194fc 100644 > > --- a/fs/xfs/libxfs/xfs_format.h > > +++ b/fs/xfs/libxfs/xfs_format.h > > @@ -1457,6 +1457,38 @@ typedef __be32 xfs_rmap_ptr_t; > > > > unsigned int xfs_refc_block(struct xfs_mount *mp); > > > > +/* > > + * Data record/key structure > > + * > > + * Each record associates a range of physical blocks (starting at > > + * rc_startblock and ending rc_blockcount blocks later) with a > > + * reference count (rc_refcount). A record is only stored in the > > + * btree if the refcount is > 2. An entry in the free block btree > > Should this be >= 2? Actually, it's just plain wrong since I started tracking CoW staging extents in the refcount btree. The comment should read: /* * Data record/key structure * * Each record associates a range of physical blocks (starting at * rc_startblock and ending rc_blockcount blocks later) with a reference * count (rc_refcount). Extents that are being used to stage a copy on * write (CoW) operation are recorded in the refcount btree with a * refcount of 1. All other records must have a refcount > 1 and must * track an extent mapped only by file data forks. * * Extents with a single owner (attributes, metadata, non-shared file * data) are not tracked here. Free space is also not tracked here. * This is consistent with pre-reflink XFS. */ --D > > Brian > > > + * means that the refcount is 0, and no entries anywhere means that > > + * the refcount is 1, as was true in XFS before reflinking. > > + */ > > +struct xfs_refcount_rec { > > + __be32 rc_startblock; /* starting block number */ > > + __be32 rc_blockcount; /* count of blocks */ > > + __be32 rc_refcount; /* number of inodes linked here */ > > +}; > > + > > +struct xfs_refcount_key { > > + __be32 rc_startblock; /* starting block number */ > > +}; > > + > > +struct xfs_refcount_irec { > > + xfs_agblock_t rc_startblock; /* starting block number */ > > + xfs_extlen_t rc_blockcount; /* count of free blocks */ > > + xfs_nlink_t rc_refcount; /* number of inodes linked here */ > > +}; > > + > > +#define MAXREFCOUNT ((xfs_nlink_t)~0U) > > +#define MAXREFCEXTLEN ((xfs_extlen_t)~0U) > > + > > +/* btree pointer type */ > > +typedef __be32 xfs_refcount_ptr_t; > > + > > > > /* > > * BMAP Btree format definitions > > diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c > > new file mode 100644 > > index 0000000..359cf0c > > --- /dev/null > > +++ b/fs/xfs/libxfs/xfs_refcount_btree.c > > @@ -0,0 +1,178 @@ > > +/* > > + * Copyright (C) 2016 Oracle. All Rights Reserved. > > + * > > + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version 2 > > + * of the License, or (at your option) any later version. > > + * > > + * This program is distributed in the hope that it would be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; if not, write the Free Software Foundation, > > + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. > > + */ > > +#include "xfs.h" > > +#include "xfs_fs.h" > > +#include "xfs_shared.h" > > +#include "xfs_format.h" > > +#include "xfs_log_format.h" > > +#include "xfs_trans_resv.h" > > +#include "xfs_sb.h" > > +#include "xfs_mount.h" > > +#include "xfs_btree.h" > > +#include "xfs_bmap.h" > > +#include "xfs_refcount_btree.h" > > +#include "xfs_alloc.h" > > +#include "xfs_error.h" > > +#include "xfs_trace.h" > > +#include "xfs_cksum.h" > > +#include "xfs_trans.h" > > +#include "xfs_bit.h" > > + > > +static struct xfs_btree_cur * > > +xfs_refcountbt_dup_cursor( > > + struct xfs_btree_cur *cur) > > +{ > > + return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp, > > + cur->bc_private.a.agbp, cur->bc_private.a.agno, > > + cur->bc_private.a.dfops); > > +} > > + > > +STATIC bool > > +xfs_refcountbt_verify( > > + struct xfs_buf *bp) > > +{ > > + struct xfs_mount *mp = bp->b_target->bt_mount; > > + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); > > + struct xfs_perag *pag = bp->b_pag; > > + unsigned int level; > > + > > + if (block->bb_magic != cpu_to_be32(XFS_REFC_CRC_MAGIC)) > > + return false; > > + > > + if (!xfs_sb_version_hasreflink(&mp->m_sb)) > > + return false; > > + if (!xfs_btree_sblock_v5hdr_verify(bp)) > > + return false; > > + > > + level = be16_to_cpu(block->bb_level); > > + if (pag && pag->pagf_init) { > > + if (level >= pag->pagf_refcount_level) > > + return false; > > + } else if (level >= mp->m_refc_maxlevels) > > + return false; > > + > > + return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); > > +} > > + > > +STATIC void > > +xfs_refcountbt_read_verify( > > + struct xfs_buf *bp) > > +{ > > + if (!xfs_btree_sblock_verify_crc(bp)) > > + xfs_buf_ioerror(bp, -EFSBADCRC); > > + else if (!xfs_refcountbt_verify(bp)) > > + xfs_buf_ioerror(bp, -EFSCORRUPTED); > > + > > + if (bp->b_error) { > > + trace_xfs_btree_corrupt(bp, _RET_IP_); > > + xfs_verifier_error(bp); > > + } > > +} > > + > > +STATIC void > > +xfs_refcountbt_write_verify( > > + struct xfs_buf *bp) > > +{ > > + if (!xfs_refcountbt_verify(bp)) { > > + trace_xfs_btree_corrupt(bp, _RET_IP_); > > + xfs_buf_ioerror(bp, -EFSCORRUPTED); > > + xfs_verifier_error(bp); > > + return; > > + } > > + xfs_btree_sblock_calc_crc(bp); > > + > > +} > > + > > +const struct xfs_buf_ops xfs_refcountbt_buf_ops = { > > + .name = "xfs_refcountbt", > > + .verify_read = xfs_refcountbt_read_verify, > > + .verify_write = xfs_refcountbt_write_verify, > > +}; > > + > > +static const struct xfs_btree_ops xfs_refcountbt_ops = { > > + .rec_len = sizeof(struct xfs_refcount_rec), > > + .key_len = sizeof(struct xfs_refcount_key), > > + > > + .dup_cursor = xfs_refcountbt_dup_cursor, > > + .buf_ops = &xfs_refcountbt_buf_ops, > > +}; > > + > > +/* > > + * Allocate a new refcount btree cursor. > > + */ > > +struct xfs_btree_cur * > > +xfs_refcountbt_init_cursor( > > + struct xfs_mount *mp, > > + struct xfs_trans *tp, > > + struct xfs_buf *agbp, > > + xfs_agnumber_t agno, > > + struct xfs_defer_ops *dfops) > > +{ > > + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); > > + struct xfs_btree_cur *cur; > > + > > + ASSERT(agno != NULLAGNUMBER); > > + ASSERT(agno < mp->m_sb.sb_agcount); > > + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); > > + > > + cur->bc_tp = tp; > > + cur->bc_mp = mp; > > + cur->bc_btnum = XFS_BTNUM_REFC; > > + cur->bc_blocklog = mp->m_sb.sb_blocklog; > > + cur->bc_ops = &xfs_refcountbt_ops; > > + > > + cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); > > + > > + cur->bc_private.a.agbp = agbp; > > + cur->bc_private.a.agno = agno; > > + cur->bc_private.a.dfops = dfops; > > + cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; > > + > > + cur->bc_private.a.priv.refc.nr_ops = 0; > > + cur->bc_private.a.priv.refc.shape_changes = 0; > > + > > + return cur; > > +} > > + > > +/* > > + * Calculate the number of records in a refcount btree block. > > + */ > > +int > > +xfs_refcountbt_maxrecs( > > + struct xfs_mount *mp, > > + int blocklen, > > + bool leaf) > > +{ > > + blocklen -= XFS_REFCOUNT_BLOCK_LEN; > > + > > + if (leaf) > > + return blocklen / sizeof(struct xfs_refcount_rec); > > + return blocklen / (sizeof(struct xfs_refcount_key) + > > + sizeof(xfs_refcount_ptr_t)); > > +} > > + > > +/* Compute the maximum height of a refcount btree. */ > > +void > > +xfs_refcountbt_compute_maxlevels( > > + struct xfs_mount *mp) > > +{ > > + mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(mp, > > + mp->m_refc_mnr, mp->m_sb.sb_agblocks); > > +} > > diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h > > new file mode 100644 > > index 0000000..9e9ad7c > > --- /dev/null > > +++ b/fs/xfs/libxfs/xfs_refcount_btree.h > > @@ -0,0 +1,67 @@ > > +/* > > + * Copyright (C) 2016 Oracle. All Rights Reserved. > > + * > > + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version 2 > > + * of the License, or (at your option) any later version. > > + * > > + * This program is distributed in the hope that it would be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program; if not, write the Free Software Foundation, > > + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. > > + */ > > +#ifndef __XFS_REFCOUNT_BTREE_H__ > > +#define __XFS_REFCOUNT_BTREE_H__ > > + > > +/* > > + * Reference Count Btree on-disk structures > > + */ > > + > > +struct xfs_buf; > > +struct xfs_btree_cur; > > +struct xfs_mount; > > + > > +/* > > + * Btree block header size > > + */ > > +#define XFS_REFCOUNT_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN > > + > > +/* > > + * Record, key, and pointer address macros for btree blocks. > > + * > > + * (note that some of these may appear unused, but they are used in userspace) > > + */ > > +#define XFS_REFCOUNT_REC_ADDR(block, index) \ > > + ((struct xfs_refcount_rec *) \ > > + ((char *)(block) + \ > > + XFS_REFCOUNT_BLOCK_LEN + \ > > + (((index) - 1) * sizeof(struct xfs_refcount_rec)))) > > + > > +#define XFS_REFCOUNT_KEY_ADDR(block, index) \ > > + ((struct xfs_refcount_key *) \ > > + ((char *)(block) + \ > > + XFS_REFCOUNT_BLOCK_LEN + \ > > + ((index) - 1) * sizeof(struct xfs_refcount_key))) > > + > > +#define XFS_REFCOUNT_PTR_ADDR(block, index, maxrecs) \ > > + ((xfs_refcount_ptr_t *) \ > > + ((char *)(block) + \ > > + XFS_REFCOUNT_BLOCK_LEN + \ > > + (maxrecs) * sizeof(struct xfs_refcount_key) + \ > > + ((index) - 1) * sizeof(xfs_refcount_ptr_t))) > > + > > +extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, > > + struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno, > > + struct xfs_defer_ops *dfops); > > +extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen, > > + bool leaf); > > +extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); > > + > > +#endif /* __XFS_REFCOUNT_BTREE_H__ */ > > diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c > > index 4aecc5f..a70aec9 100644 > > --- a/fs/xfs/libxfs/xfs_sb.c > > +++ b/fs/xfs/libxfs/xfs_sb.c > > @@ -38,6 +38,8 @@ > > #include "xfs_ialloc_btree.h" > > #include "xfs_log.h" > > #include "xfs_rmap_btree.h" > > +#include "xfs_bmap.h" > > +#include "xfs_refcount_btree.h" > > > > /* > > * Physical superblock buffer manipulations. Shared with libxfs in userspace. > > @@ -737,6 +739,13 @@ xfs_sb_mount_common( > > mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; > > mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; > > > > + mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, > > + true); > > + mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, > > + false); > > + mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; > > + mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; > > + > > mp->m_bsize = XFS_FSB_TO_BB(mp, 1); > > mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, > > sbp->sb_inopblock); > > diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h > > index 0c5b30b..c6f4eb4 100644 > > --- a/fs/xfs/libxfs/xfs_shared.h > > +++ b/fs/xfs/libxfs/xfs_shared.h > > @@ -39,6 +39,7 @@ extern const struct xfs_buf_ops xfs_agf_buf_ops; > > extern const struct xfs_buf_ops xfs_agfl_buf_ops; > > extern const struct xfs_buf_ops xfs_allocbt_buf_ops; > > extern const struct xfs_buf_ops xfs_rmapbt_buf_ops; > > +extern const struct xfs_buf_ops xfs_refcountbt_buf_ops; > > extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; > > extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; > > extern const struct xfs_buf_ops xfs_bmbt_buf_ops; > > @@ -122,6 +123,7 @@ int xfs_log_calc_minimum_size(struct xfs_mount *); > > #define XFS_INO_REF 2 > > #define XFS_ATTR_BTREE_REF 1 > > #define XFS_DQUOT_REF 1 > > +#define XFS_REFC_BTREE_REF 1 > > > > /* > > * Flags for xfs_trans_ichgtime(). > > diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c > > index 301ef2f..7c840e1 100644 > > --- a/fs/xfs/libxfs/xfs_trans_resv.c > > +++ b/fs/xfs/libxfs/xfs_trans_resv.c > > @@ -73,7 +73,7 @@ xfs_calc_buf_res( > > * > > * Keep in mind that max depth is calculated separately for each type of tree. > > */ > > -static uint > > +uint > > xfs_allocfree_log_count( > > struct xfs_mount *mp, > > uint num_ops) > > diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h > > index 0eb46ed..36a1511 100644 > > --- a/fs/xfs/libxfs/xfs_trans_resv.h > > +++ b/fs/xfs/libxfs/xfs_trans_resv.h > > @@ -102,5 +102,6 @@ struct xfs_trans_resv { > > #define XFS_ATTRRM_LOG_COUNT 3 > > > > void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp); > > +uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops); > > > > #endif /* __XFS_TRANS_RESV_H__ */ > > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > > index 56e85a6..3f64615 100644 > > --- a/fs/xfs/xfs_mount.c > > +++ b/fs/xfs/xfs_mount.c > > @@ -43,6 +43,7 @@ > > #include "xfs_icache.h" > > #include "xfs_sysfs.h" > > #include "xfs_rmap_btree.h" > > +#include "xfs_refcount_btree.h" > > > > > > static DEFINE_MUTEX(xfs_uuid_table_mutex); > > @@ -684,6 +685,7 @@ xfs_mountfs( > > xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); > > xfs_ialloc_compute_maxlevels(mp); > > xfs_rmapbt_compute_maxlevels(mp); > > + xfs_refcountbt_compute_maxlevels(mp); > > > > xfs_set_maxicount(mp); > > > > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > > index 8fab496..0be14a7 100644 > > --- a/fs/xfs/xfs_mount.h > > +++ b/fs/xfs/xfs_mount.h > > @@ -124,10 +124,13 @@ typedef struct xfs_mount { > > uint m_inobt_mnr[2]; /* min inobt btree records */ > > uint m_rmap_mxr[2]; /* max rmap btree records */ > > uint m_rmap_mnr[2]; /* min rmap btree records */ > > + uint m_refc_mxr[2]; /* max refc btree records */ > > + uint m_refc_mnr[2]; /* min refc btree records */ > > uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ > > uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ > > uint m_in_maxlevels; /* max inobt btree levels. */ > > uint m_rmap_maxlevels; /* max rmap btree levels */ > > + uint m_refc_maxlevels; /* max refcount btree level */ > > xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ > > uint m_alloc_set_aside; /* space we can't use */ > > uint m_ag_max_usable; /* max space per AG */ > > diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h > > index 69e2986..0c381d7 100644 > > --- a/fs/xfs/xfs_ondisk.h > > +++ b/fs/xfs/xfs_ondisk.h > > @@ -49,6 +49,8 @@ xfs_check_ondisk_structs(void) > > XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); > > XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); > > XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); > > + XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key, 4); > > + XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12); > > XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20); > > XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24); > > XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8); > > @@ -56,6 +58,7 @@ xfs_check_ondisk_structs(void) > > XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); > > XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); > > XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); > > + XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4); > > XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4); > > > > /* dir/attr trees */ > > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > > index ee439e5..631c114 100644 > > --- a/fs/xfs/xfs_trace.h > > +++ b/fs/xfs/xfs_trace.h > > @@ -39,16 +39,7 @@ struct xfs_buf_log_format; > > struct xfs_inode_log_format; > > struct xfs_bmbt_irec; > > struct xfs_btree_cur; > > - > > -#ifndef XFS_REFCOUNT_IREC_PLACEHOLDER > > -#define XFS_REFCOUNT_IREC_PLACEHOLDER > > -/* Placeholder definition to avoid breaking bisectability. */ > > -struct xfs_refcount_irec { > > - xfs_agblock_t rc_startblock; /* starting block number */ > > - xfs_extlen_t rc_blockcount; /* count of free blocks */ > > - xfs_nlink_t rc_refcount; /* number of inodes linked here */ > > -}; > > -#endif > > +struct xfs_refcount_irec; > > > > DECLARE_EVENT_CLASS(xfs_attr_list_class, > > TP_PROTO(struct xfs_attr_list_context *ctx), > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html