[PATCH 01/14] xfs: create a per-AG btree to track reference counts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Create a per-AG btree to track the reference counts of physical blocks
to support reflink.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/Makefile                   |    1 
 fs/xfs/libxfs/xfs_alloc.c         |   19 +
 fs/xfs/libxfs/xfs_btree.c         |    8 -
 fs/xfs/libxfs/xfs_btree.h         |    7 
 fs/xfs/libxfs/xfs_format.h        |   59 ++++
 fs/xfs/libxfs/xfs_reflink_btree.c |  531 +++++++++++++++++++++++++++++++++++++
 fs/xfs/libxfs/xfs_reflink_btree.h |   70 +++++
 fs/xfs/libxfs/xfs_sb.c            |    7 
 fs/xfs/libxfs/xfs_shared.h        |    1 
 fs/xfs/libxfs/xfs_trans_resv.c    |    2 
 fs/xfs/libxfs/xfs_types.h         |    2 
 fs/xfs/xfs_mount.h                |    5 
 fs/xfs/xfs_stats.c                |    1 
 fs/xfs/xfs_stats.h                |   18 +
 14 files changed, 722 insertions(+), 9 deletions(-)
 create mode 100644 fs/xfs/libxfs/xfs_reflink_btree.c
 create mode 100644 fs/xfs/libxfs/xfs_reflink_btree.h


diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index e338595..ba89aee 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -52,6 +52,7 @@ xfs-y				+= $(addprefix libxfs/, \
 				   xfs_log_rlimit.o \
 				   xfs_rmap.o \
 				   xfs_rmap_btree.o \
+				   xfs_reflink_btree.o \
 				   xfs_sb.o \
 				   xfs_symlink_remote.o \
 				   xfs_trans_resv.o \
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c6a1372..fc8a499 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -54,6 +54,8 @@ xfs_extlen_t
 xfs_prealloc_blocks(
 	struct xfs_mount	*mp)
 {
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		return XFS_RL_BLOCK(mp) + 1;
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		return XFS_RMAP_BLOCK(mp) + 1;
 	if (xfs_sb_version_hasfinobt(&mp->m_sb))
@@ -91,9 +93,11 @@ xfs_alloc_set_aside(
 	unsigned int	blocks;
 
 	blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
-	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
-		return blocks;
-	return blocks + (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+		blocks += (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		blocks += (mp->m_sb.sb_agcount * (2 * mp->m_ag_maxlevels) - 1);
+	return blocks;
 }
 
 /*
@@ -123,6 +127,10 @@ xfs_alloc_ag_max_usable(struct xfs_mount *mp)
 		/* rmap root block + full tree split on full AG */
 		blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
 	}
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		/* reflink root block + full tree split on full AG */
+		blocks += 1 + (2 * mp->m_ag_maxlevels) - 1;
+	}
 
 	return mp->m_sb.sb_agblocks - blocks;
 }
@@ -2378,6 +2386,10 @@ xfs_agf_verify(
 	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length))
 		return false;
 
+	if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+	    be32_to_cpu(agf->agf_reflink_level) > XFS_BTREE_MAXLEVELS)
+		return false;
+
 	return true;;
 
 }
@@ -2497,6 +2509,7 @@ xfs_alloc_read_agf(
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
 		pag->pagf_levels[XFS_BTNUM_RMAPi] =
 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+		pag->pagf_reflink_level = be32_to_cpu(agf->agf_reflink_level);
 		spin_lock_init(&pag->pagb_lock);
 		pag->pagb_count = 0;
 		pag->pagb_tree = RB_ROOT;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 4c9b9b3..8820aad 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -43,9 +43,10 @@ kmem_zone_t	*xfs_btree_cur_zone;
  */
 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
 	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
-	  XFS_FIBT_MAGIC },
+	  XFS_FIBT_MAGIC, 0 },
 	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
-	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
+	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
+	  XFS_RLBT_CRC_MAGIC }
 };
 #define xfs_btree_magic(cur) \
 	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
@@ -1117,6 +1118,9 @@ xfs_btree_set_refs(
 	case XFS_BTNUM_RMAP:
 		xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
 		break;
+	case XFS_BTNUM_RL:
+		xfs_buf_set_ref(bp, XFS_REFLINK_BTREE_REF);
+		break;
 	default:
 		ASSERT(0);
 	}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 48ab2b1..a3f8661 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -43,6 +43,7 @@ union xfs_btree_key {
 	xfs_alloc_key_t			alloc;
 	struct xfs_inobt_key		inobt;
 	struct xfs_rmap_key		rmap;
+	xfs_reflink_key_t		reflink;
 };
 
 union xfs_btree_rec {
@@ -51,6 +52,7 @@ union xfs_btree_rec {
 	struct xfs_alloc_rec		alloc;
 	struct xfs_inobt_rec		inobt;
 	struct xfs_rmap_rec		rmap;
+	xfs_reflink_rec_t		reflink;
 };
 
 /*
@@ -67,6 +69,8 @@ union xfs_btree_rec {
 #define	XFS_BTNUM_FINO	((xfs_btnum_t)XFS_BTNUM_FINOi)
 #define	XFS_BTNUM_RMAP	((xfs_btnum_t)XFS_BTNUM_RMAPi)
 
+#define	XFS_BTNUM_RL	((xfs_btnum_t)XFS_BTNUM_RLi)
+
 /*
  * For logging record fields.
  */
@@ -98,6 +102,7 @@ do {    \
 	case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break;	\
 	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break;	\
 	case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_INC(rmap, stat); break;	\
+	case XFS_BTNUM_RL: __XFS_BTREE_STATS_INC(rlbt, stat); break;	\
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\
 	}       \
 } while (0)
@@ -113,6 +118,7 @@ do {    \
 	case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
 	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
 	case XFS_BTNUM_RMAP: __XFS_BTREE_STATS_ADD(rmap, stat, val); break; \
+	case XFS_BTNUM_RL: __XFS_BTREE_STATS_INC(rlbt, stat); break;	\
 	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\
 	}       \
 } while (0)
@@ -205,6 +211,7 @@ typedef struct xfs_btree_cur
 		xfs_bmbt_irec_t		b;
 		xfs_inobt_rec_incore_t	i;
 		struct xfs_rmap_irec	r;
+		xfs_reflink_rec_incore_t	rl;
 	}		bc_rec;		/* current insert/search record value */
 	struct xfs_buf	*bc_bufs[XFS_BTREE_MAXLEVELS];	/* buf ptr per level */
 	int		bc_ptrs[XFS_BTREE_MAXLEVELS];	/* key/record # */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9cff517..e4954ab 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -446,9 +446,11 @@ xfs_sb_has_compat_feature(
 
 #define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */
 #define XFS_SB_FEAT_RO_COMPAT_RMAPBT   (1 << 1)		/* reverse map btree */
+#define XFS_SB_FEAT_RO_COMPAT_REFLINK  (1 << 2)		/* reflink btree */
 #define XFS_SB_FEAT_RO_COMPAT_ALL \
 		(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
-		 XFS_SB_FEAT_RO_COMPAT_RMAPBT)
+		 XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
+		 XFS_SB_FEAT_RO_COMPAT_REFLINK)
 #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL
 static inline bool
 xfs_sb_has_ro_compat_feature(
@@ -522,6 +524,12 @@ static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT);
 }
 
+static inline int xfs_sb_version_hasreflink(xfs_sb_t *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK);
+}
+
 /*
  * end of superblock version macros
  */
@@ -616,12 +624,15 @@ typedef struct xfs_agf {
 	__be32		agf_btreeblks;	/* # of blocks held in AGF btrees */
 	uuid_t		agf_uuid;	/* uuid of filesystem */
 
+	__be32		agf_reflink_root;	/* reflink tree root block */
+	__be32		agf_reflink_level;	/* reflink btree levels */
+
 	/*
 	 * reserve some contiguous space for future logged fields before we add
 	 * the unlogged fields. This makes the range logging via flags and
 	 * structure offsets much simpler.
 	 */
-	__be64		agf_spare64[16];
+	__be64		agf_spare64[15];
 
 	/* unlogged fields, written during buffer writeback. */
 	__be64		agf_lsn;	/* last write sequence */
@@ -1338,6 +1349,50 @@ typedef __be32 xfs_rmap_ptr_t;
 	 XFS_IBT_BLOCK(mp) + 1)
 
 /*
+ * reflink Btree format definitions
+ *
+ */
+#define	XFS_RLBT_CRC_MAGIC	0x524C4233	/* 'RLB3' */
+
+/*
+ * Data record/key structure
+ */
+typedef struct xfs_reflink_rec {
+	__be32		rr_startblock;	/* starting block number */
+	__be32		rr_blockcount;	/* count of blocks */
+	__be32		rr_nlinks;	/* number of inodes linked here */
+} xfs_reflink_rec_t;
+
+typedef struct xfs_reflink_key {
+	__be32		rr_startblock;	/* starting block number */
+} xfs_reflink_key_t;
+
+typedef struct xfs_reflink_rec_incore {
+	xfs_agblock_t	rr_startblock;	/* starting block number */
+	xfs_extlen_t	rr_blockcount;	/* count of free blocks */
+	xfs_nlink_t	rr_nlinks;	/* number of inodes linked here */
+} xfs_reflink_rec_incore_t;
+
+/*
+ * When a block hits MAXRLCOUNT references, it becomes permanently
+ * stuck in CoW mode, because who knows how many times it's really
+ * referenced.
+ */
+#define MAXRLCOUNT	((xfs_nlink_t)~0U)
+#define MAXRLEXTLEN	((xfs_extlen_t)~0U)
+
+/* btree pointer type */
+typedef __be32 xfs_reflink_ptr_t;
+
+#define	XFS_RL_BLOCK(mp) \
+	(xfs_sb_version_hasrmapbt(&((mp)->m_sb)) ? \
+	 XFS_RMAP_BLOCK(mp) + 1 : \
+	 (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
+	  XFS_FIBT_BLOCK(mp) + 1 : \
+	  XFS_IBT_BLOCK(mp) + 1))
+
+
+/*
  * BMAP Btree format definitions
  *
  * This includes both the root block definition that sits inside an inode fork
diff --git a/fs/xfs/libxfs/xfs_reflink_btree.c b/fs/xfs/libxfs/xfs_reflink_btree.c
new file mode 100644
index 0000000..8a0fa5d
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_reflink_btree.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_reflink_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_trans.h"
+#include "xfs_bit.h"
+
+#undef REFLINK_DEBUG
+
+#ifdef REFLINK_DEBUG
+# define dbg_printk(f, a...)  do {printk(KERN_ERR f, ## a); } while (0)
+#else
+# define dbg_printk(f, a...)
+#endif
+
+#define CHECK_AG_NUMBER(mp, agno) \
+	do { \
+		ASSERT((agno) != NULLAGNUMBER); \
+		ASSERT((agno) < (mp)->m_sb.sb_agcount); \
+	} while(0);
+
+#define CHECK_AG_EXTENT(mp, agbno, len) \
+	do { \
+		ASSERT((agbno) != NULLAGBLOCK); \
+		ASSERT((len) > 0); \
+		ASSERT((unsigned long long)(agbno) + (len) <= \
+				(mp)->m_sb.sb_agblocks); \
+	} while(0);
+
+#define XFS_WANT_CORRUPTED_RLEXT_GOTO(mp, have, agbno, len, nr, label) \
+	do { \
+		XFS_WANT_CORRUPTED_GOTO((mp), (have) == 1, label); \
+		XFS_WANT_CORRUPTED_GOTO((mp), (len) > 0, label); \
+		XFS_WANT_CORRUPTED_GOTO((mp), (nr) >= 2, label); \
+		XFS_WANT_CORRUPTED_GOTO((mp), (unsigned long long)(agbno) + \
+				(len) <= (mp)->m_sb.sb_agblocks, label); \
+	} while(0);
+
+STATIC struct xfs_btree_cur *
+xfs_reflinkbt_dup_cursor(
+	struct xfs_btree_cur	*cur)
+{
+	return xfs_reflinkbt_init_cursor(cur->bc_mp, cur->bc_tp,
+			cur->bc_private.a.agbp, cur->bc_private.a.agno);
+}
+
+STATIC void
+xfs_reflinkbt_set_root(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr,
+	int			inc)
+{
+	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+	xfs_agnumber_t		seqno = be32_to_cpu(agf->agf_seqno);
+	struct xfs_perag	*pag = xfs_perag_get(cur->bc_mp, seqno);
+
+	ASSERT(ptr->s != 0);
+
+	agf->agf_reflink_root = ptr->s;
+	be32_add_cpu(&agf->agf_reflink_level, inc);
+	pag->pagf_reflink_level += inc;
+	xfs_perag_put(pag);
+
+	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+}
+
+STATIC int
+xfs_reflinkbt_alloc_block(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*start,
+	union xfs_btree_ptr	*new,
+	int			*stat)
+{
+	int			error;
+	xfs_agblock_t		bno;
+
+	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+
+	/* Allocate the new block from the freelist. If we can't, give up.  */
+	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
+				       &bno, 1);
+	if (error) {
+		XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+		return error;
+	}
+
+	if (bno == NULLAGBLOCK) {
+		XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+		*stat = 0;
+		return 0;
+	}
+
+	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
+
+	xfs_trans_agbtree_delta(cur->bc_tp, 1);
+	new->s = cpu_to_be32(bno);
+
+	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+	*stat = 1;
+	return 0;
+}
+
+STATIC int
+xfs_reflinkbt_free_block(
+	struct xfs_btree_cur	*cur,
+	struct xfs_buf		*bp)
+{
+	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+	xfs_agblock_t		bno;
+	int			error;
+
+	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
+	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
+	if (error)
+		return error;
+
+	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
+			      XFS_EXTENT_BUSY_SKIP_DISCARD);
+	xfs_trans_agbtree_delta(cur->bc_tp, -1);
+
+	xfs_trans_binval(cur->bc_tp, bp);
+	return 0;
+}
+
+STATIC int
+xfs_reflinkbt_get_minrecs(
+	struct xfs_btree_cur	*cur,
+	int			level)
+{
+	return cur->bc_mp->m_rlbt_mnr[level != 0];
+}
+
+STATIC int
+xfs_reflinkbt_get_maxrecs(
+	struct xfs_btree_cur	*cur,
+	int			level)
+{
+	return cur->bc_mp->m_rlbt_mxr[level != 0];
+}
+
+STATIC void
+xfs_reflinkbt_init_key_from_rec(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	ASSERT(rec->reflink.rr_startblock != 0);
+
+	key->reflink.rr_startblock = rec->reflink.rr_startblock;
+}
+
+STATIC void
+xfs_reflinkbt_init_rec_from_key(
+	union xfs_btree_key	*key,
+	union xfs_btree_rec	*rec)
+{
+	ASSERT(key->reflink.rr_startblock != 0);
+
+	rec->reflink.rr_startblock = key->reflink.rr_startblock;
+}
+
+STATIC void
+xfs_reflinkbt_init_rec_from_cur(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*rec)
+{
+	ASSERT(cur->bc_rec.rl.rr_startblock != 0);
+
+	rec->reflink.rr_startblock = cpu_to_be32(cur->bc_rec.rl.rr_startblock);
+	rec->reflink.rr_blockcount = cpu_to_be32(cur->bc_rec.rl.rr_blockcount);
+	rec->reflink.rr_nlinks = cpu_to_be32(cur->bc_rec.rl.rr_nlinks);
+}
+
+STATIC void
+xfs_reflinkbt_init_ptr_from_cur(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_ptr	*ptr)
+{
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
+
+	ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
+	ASSERT(agf->agf_reflink_root != 0);
+
+	ptr->s = agf->agf_reflink_root;
+}
+
+STATIC __int64_t
+xfs_reflinkbt_key_diff(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*key)
+{
+	xfs_reflink_rec_incore_t	*rec = &cur->bc_rec.rl;
+	xfs_reflink_key_t		*kp = &key->reflink;
+
+	return (__int64_t)be32_to_cpu(kp->rr_startblock) - rec->rr_startblock;
+}
+
+static bool
+xfs_reflinkbt_verify(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_target->bt_mount;
+	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
+	struct xfs_perag	*pag = bp->b_pag;
+	unsigned int		level;
+
+	if (block->bb_magic != cpu_to_be32(XFS_RLBT_CRC_MAGIC))
+		return false;
+
+	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+		return false;
+	if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+		return false;
+	if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+		return false;
+	if (pag &&
+	    be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+		return false;
+
+	level = be16_to_cpu(block->bb_level);
+	if (pag && pag->pagf_init) {
+		if (level >= pag->pagf_reflink_level)
+			return false;
+	} else if (level >= mp->m_ag_maxlevels)
+		return false;
+
+	/* numrecs verification */
+	if (be16_to_cpu(block->bb_numrecs) > mp->m_rlbt_mxr[level != 0])
+		return false;
+
+	/* sibling pointer verification */
+	if (!block->bb_u.s.bb_leftsib ||
+	    (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+	     block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+		return false;
+	if (!block->bb_u.s.bb_rightsib ||
+	    (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+	     block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+		return false;
+
+	return true;
+}
+
+static void
+xfs_reflinkbt_read_verify(
+	struct xfs_buf	*bp)
+{
+	if (!xfs_btree_sblock_verify_crc(bp))
+		xfs_buf_ioerror(bp, -EFSBADCRC);
+	else if (!xfs_reflinkbt_verify(bp))
+		xfs_buf_ioerror(bp, -EFSCORRUPTED);
+
+	if (bp->b_error) {
+		trace_xfs_btree_corrupt(bp, _RET_IP_);
+		xfs_verifier_error(bp);
+	}
+}
+
+static void
+xfs_reflinkbt_write_verify(
+	struct xfs_buf	*bp)
+{
+	if (!xfs_reflinkbt_verify(bp)) {
+		trace_xfs_btree_corrupt(bp, _RET_IP_);
+		xfs_buf_ioerror(bp, -EFSCORRUPTED);
+		xfs_verifier_error(bp);
+		return;
+	}
+	xfs_btree_sblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_reflinkbt_buf_ops = {
+	.verify_read = xfs_reflinkbt_read_verify,
+	.verify_write = xfs_reflinkbt_write_verify,
+};
+
+
+#if defined(DEBUG) || defined(XFS_WARN)
+STATIC int
+xfs_reflinkbt_keys_inorder(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_key	*k1,
+	union xfs_btree_key	*k2)
+{
+	return be32_to_cpu(k1->reflink.rr_startblock) <
+	       be32_to_cpu(k2->reflink.rr_startblock);
+}
+
+STATIC int
+xfs_reflinkbt_recs_inorder(
+	struct xfs_btree_cur	*cur,
+	union xfs_btree_rec	*r1,
+	union xfs_btree_rec	*r2)
+{
+	return be32_to_cpu(r1->reflink.rr_startblock) +
+		be32_to_cpu(r1->reflink.rr_blockcount) <=
+		be32_to_cpu(r2->reflink.rr_startblock);
+}
+#endif	/* DEBUG */
+
+static const struct xfs_btree_ops xfs_reflinkbt_ops = {
+	.rec_len		= sizeof(xfs_reflink_rec_t),
+	.key_len		= sizeof(xfs_reflink_key_t),
+
+	.dup_cursor		= xfs_reflinkbt_dup_cursor,
+	.set_root		= xfs_reflinkbt_set_root,
+	.alloc_block		= xfs_reflinkbt_alloc_block,
+	.free_block		= xfs_reflinkbt_free_block,
+	.get_minrecs		= xfs_reflinkbt_get_minrecs,
+	.get_maxrecs		= xfs_reflinkbt_get_maxrecs,
+	.init_key_from_rec	= xfs_reflinkbt_init_key_from_rec,
+	.init_rec_from_key	= xfs_reflinkbt_init_rec_from_key,
+	.init_rec_from_cur	= xfs_reflinkbt_init_rec_from_cur,
+	.init_ptr_from_cur	= xfs_reflinkbt_init_ptr_from_cur,
+	.key_diff		= xfs_reflinkbt_key_diff,
+	.buf_ops		= &xfs_reflinkbt_buf_ops,
+#if defined(DEBUG) || defined(XFS_WARN)
+	.keys_inorder		= xfs_reflinkbt_keys_inorder,
+	.recs_inorder		= xfs_reflinkbt_recs_inorder,
+#endif
+};
+
+/*
+ * Allocate a new reflink btree cursor.
+ */
+struct xfs_btree_cur *			/* new reflink btree cursor */
+xfs_reflinkbt_init_cursor(
+	struct xfs_mount	*mp,		/* file system mount point */
+	struct xfs_trans	*tp,		/* transaction pointer */
+	struct xfs_buf		*agbp,		/* buffer for agf structure */
+	xfs_agnumber_t		agno)		/* allocation group number */
+{
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
+	struct xfs_btree_cur	*cur;
+
+	CHECK_AG_NUMBER(mp, agno);
+	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
+
+	cur->bc_tp = tp;
+	cur->bc_mp = mp;
+	cur->bc_btnum = XFS_BTNUM_RL;
+	cur->bc_blocklog = mp->m_sb.sb_blocklog;
+	cur->bc_ops = &xfs_reflinkbt_ops;
+
+	cur->bc_nlevels = be32_to_cpu(agf->agf_reflink_level);
+
+	cur->bc_private.a.agbp = agbp;
+	cur->bc_private.a.agno = agno;
+
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
+
+	return cur;
+}
+
+/*
+ * Calculate number of records in an reflink btree block.
+ */
+int
+xfs_reflinkbt_maxrecs(
+	struct xfs_mount	*mp,
+	int			blocklen,
+	int			leaf)
+{
+	blocklen -= XFS_REFLINK_BLOCK_LEN;
+
+	if (leaf)
+		return blocklen / sizeof(xfs_reflink_rec_t);
+	return blocklen / (sizeof(xfs_reflink_key_t) +
+			   sizeof(xfs_reflink_ptr_t));
+}
+
+/*
+ * Lookup the first record less than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_reflink_lookup_le(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	int			*stat)	/* success/failure */
+{
+	cur->bc_rec.rl.rr_startblock = bno;
+	cur->bc_rec.rl.rr_blockcount = 0;
+	return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+}
+
+/*
+ * Lookup the first record greater than or equal to [bno, len]
+ * in the btree given by cur.
+ */
+int					/* error */
+xfs_reflink_lookup_ge(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	int			*stat)	/* success/failure */
+{
+	cur->bc_rec.rl.rr_startblock = bno;
+	cur->bc_rec.rl.rr_blockcount = 0;
+	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+}
+
+/*
+ * Get the data from the pointed-to record.
+ */
+int					/* error */
+xfs_reflink_get_rec(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		*bno,	/* output: starting block of extent */
+	xfs_extlen_t		*len,	/* output: length of extent */
+	xfs_nlink_t		*nlink,	/* output: number of links */
+	int			*stat)	/* output: success/failure */
+{
+	union xfs_btree_rec	*rec;
+	int			error;
+
+	error = xfs_btree_get_rec(cur, &rec, stat);
+	if (!error && *stat == 1) {
+		CHECK_AG_EXTENT(cur->bc_mp,
+			be32_to_cpu(rec->reflink.rr_startblock),
+			be32_to_cpu(rec->reflink.rr_blockcount));
+		*bno = be32_to_cpu(rec->reflink.rr_startblock);
+		*len = be32_to_cpu(rec->reflink.rr_blockcount);
+		*nlink = be32_to_cpu(rec->reflink.rr_nlinks);
+	}
+	return error;
+}
+
+/*
+ * Update the record referred to by cur to the value given
+ * by [bno, len, nr].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int				/* error */
+xfs_reflinkbt_update(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len,	/* length of extent */
+	xfs_nlink_t		nr)	/* reference count */
+{
+	union xfs_btree_rec	rec;
+
+	CHECK_AG_EXTENT(cur->bc_mp, bno, len);
+	ASSERT(nr > 1);
+
+	rec.reflink.rr_startblock = cpu_to_be32(bno);
+	rec.reflink.rr_blockcount = cpu_to_be32(len);
+	rec.reflink.rr_nlinks = cpu_to_be32(nr);
+	return xfs_btree_update(cur, &rec);
+}
+
+/*
+ * Insert the record referred to by cur to the value given
+ * by [bno, len, nr].
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int				/* error */
+xfs_reflinkbt_insert(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	xfs_agblock_t		bno,	/* starting block of extent */
+	xfs_extlen_t		len,	/* length of extent */
+	xfs_nlink_t		nr,	/* reference count */
+	int			*i)	/* success? */
+{
+	CHECK_AG_EXTENT(cur->bc_mp, bno, len);
+	ASSERT(nr > 1);
+
+	cur->bc_rec.rl.rr_startblock = bno;
+	cur->bc_rec.rl.rr_blockcount = len;
+	cur->bc_rec.rl.rr_nlinks = nr;
+	return xfs_btree_insert(cur, i);
+}
+
+/*
+ * Remove the record referred to by cur.
+ * This either works (return 0) or gets an EFSCORRUPTED error.
+ */
+STATIC int				/* error */
+xfs_reflinkbt_delete(
+	struct xfs_btree_cur	*cur,	/* btree cursor */
+	int			*i)	/* success? */
+{
+	xfs_agblock_t		bno;
+	xfs_extlen_t		len;
+	xfs_nlink_t		nr;
+	int			x;
+	int			error;
+
+	error = xfs_reflink_get_rec(cur, &bno, &len, &nr, &x);
+	if (error)
+		return error;
+	XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, x == 1, error0);
+	error = xfs_btree_delete(cur, i);
+	if (error)
+		return error;
+	error = xfs_reflink_lookup_ge(cur, bno, &x);
+error0:
+	return error;
+}
diff --git a/fs/xfs/libxfs/xfs_reflink_btree.h b/fs/xfs/libxfs/xfs_reflink_btree.h
new file mode 100644
index 0000000..a27588a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_reflink_btree.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_REFLINK_BTREE_H__
+#define	__XFS_REFLINK_BTREE_H__
+
+/*
+ * Freespace on-disk structures
+ */
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+
+/*
+ * Btree block header size depends on a superblock flag.
+ */
+#define XFS_REFLINK_BLOCK_LEN	XFS_BTREE_SBLOCK_CRC_LEN
+
+/*
+ * Record, key, and pointer address macros for btree blocks.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+#define XFS_REFLINK_REC_ADDR(block, index) \
+	((xfs_reflink_rec_t *) \
+		((char *)(block) + \
+		 XFS_REFLINK_BLOCK_LEN + \
+		 (((index) - 1) * sizeof(xfs_reflink_rec_t))))
+
+#define XFS_REFLINK_KEY_ADDR(block, index) \
+	((xfs_reflink_key_t *) \
+		((char *)(block) + \
+		 XFS_REFLINK_BLOCK_LEN + \
+		 ((index) - 1) * sizeof(xfs_reflink_key_t)))
+
+#define XFS_REFLINK_PTR_ADDR(block, index, maxrecs) \
+	((xfs_reflink_ptr_t *) \
+		((char *)(block) + \
+		 XFS_REFLINK_BLOCK_LEN + \
+		 (maxrecs) * sizeof(xfs_reflink_key_t) + \
+		 ((index) - 1) * sizeof(xfs_reflink_ptr_t)))
+
+extern struct xfs_btree_cur *xfs_reflinkbt_init_cursor(struct xfs_mount *,
+		struct xfs_trans *, struct xfs_buf *,
+		xfs_agnumber_t);
+extern int xfs_reflinkbt_maxrecs(struct xfs_mount *, int, int);
+extern int xfs_reflink_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+		int *stat);
+extern int xfs_reflink_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+		int *stat);
+extern int xfs_reflink_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno,
+		xfs_extlen_t *len, xfs_nlink_t *nlink, int *stat);
+
+#endif	/* __XFS_REFLINK_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index db5a19d3..5f8f7fd 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -36,6 +36,8 @@
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_rmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_reflink_btree.h"
 
 /*
  * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -717,6 +719,11 @@ xfs_sb_mount_common(
 	mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
 	mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
 
+	mp->m_rlbt_mxr[0] = xfs_reflinkbt_maxrecs(mp, sbp->sb_blocksize, 1);
+	mp->m_rlbt_mxr[1] = xfs_reflinkbt_maxrecs(mp, sbp->sb_blocksize, 0);
+	mp->m_rlbt_mnr[0] = mp->m_rlbt_mxr[0] / 2;
+	mp->m_rlbt_mnr[1] = mp->m_rlbt_mxr[1] / 2;
+
 	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
 	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
 					sbp->sb_inopblock);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 88efbb4..d1de74e 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -216,6 +216,7 @@ int	xfs_log_calc_minimum_size(struct xfs_mount *);
 #define	XFS_INO_REF		2
 #define	XFS_ATTR_BTREE_REF	1
 #define	XFS_DQUOT_REF		1
+#define XFS_REFLINK_BTREE_REF	1
 
 /*
  * Flags for xfs_trans_ichgtime().
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index d495f82..a6d1d3b 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -81,6 +81,8 @@ xfs_allocfree_log_count(
 
 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
 		num_trees++;
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		num_trees++;
 
 	return num_ops * num_trees * (2 * mp->m_ag_maxlevels - 1);
 }
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 3d50364..1a93ac9 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -109,7 +109,7 @@ typedef enum {
 
 typedef enum {
 	XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
-	XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX
+	XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_RLi, XFS_BTNUM_MAX
 } xfs_btnum_t;
 
 struct xfs_name {
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index cdced0b..69af7f7 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -92,6 +92,8 @@ typedef struct xfs_mount {
 	uint			m_inobt_mnr[2];	/* min inobt btree records */
 	uint			m_rmap_mxr[2];	/* max rmap btree records */
 	uint			m_rmap_mnr[2];	/* min rmap btree records */
+	uint			m_rlbt_mxr[2];	/* max rlbt btree records */
+	uint			m_rlbt_mnr[2];	/* min rlbt btree records */
 	uint			m_ag_maxlevels;	/* XFS_AG_MAXLEVELS */
 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
 	uint			m_in_maxlevels;	/* max inobt btree levels. */
@@ -315,6 +317,9 @@ typedef struct xfs_perag {
 	/* for rcu-safe freeing */
 	struct rcu_head	rcu_head;
 	int		pagb_count;	/* pagb slots in use */
+
+	/* reflink */
+	__uint8_t	pagf_reflink_level;
 } xfs_perag_t;
 
 extern int	xfs_log_sbcount(xfs_mount_t *);
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 67bbfa2..57449b8 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -61,6 +61,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
 		{ "ibt2",		XFSSTAT_END_IBT_V2		},
 		{ "fibt2",		XFSSTAT_END_FIBT_V2		},
 		{ "rmapbt",		XFSSTAT_END_RMAP_V2		},
+		{ "rlbt2",		XFSSTAT_END_RLBT_V2		},
 		/* we print both series of quota information together */
 		{ "qm",			XFSSTAT_END_QM			},
 	};
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index 8414db2..d943c04 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -215,7 +215,23 @@ struct xfsstats {
 	__uint32_t		xs_rmap_2_alloc;
 	__uint32_t		xs_rmap_2_free;
 	__uint32_t		xs_rmap_2_moves;
-#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_RMAP_V2+6)
+#define XFSSTAT_END_RLBT_V2		(XFSSTAT_END_RMAP_V2+15)
+	__uint32_t		xs_rlbt_2_lookup;
+	__uint32_t		xs_rlbt_2_compare;
+	__uint32_t		xs_rlbt_2_insrec;
+	__uint32_t		xs_rlbt_2_delrec;
+	__uint32_t		xs_rlbt_2_newroot;
+	__uint32_t		xs_rlbt_2_killroot;
+	__uint32_t		xs_rlbt_2_increment;
+	__uint32_t		xs_rlbt_2_decrement;
+	__uint32_t		xs_rlbt_2_lshift;
+	__uint32_t		xs_rlbt_2_rshift;
+	__uint32_t		xs_rlbt_2_split;
+	__uint32_t		xs_rlbt_2_join;
+	__uint32_t		xs_rlbt_2_alloc;
+	__uint32_t		xs_rlbt_2_free;
+	__uint32_t		xs_rlbt_2_moves;
+#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_RLBT_V2+6)
 	__uint32_t		xs_qm_dqreclaims;
 	__uint32_t		xs_qm_dqreclaim_misses;
 	__uint32_t		xs_qm_dquot_dups;

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux