From: Darrick J. Wong <djwong@xxxxxxxxxx> Create a new in-memory btree type so that we can store refcount bag info in a much more memory-efficient format. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> --- fs/xfs/Makefile | 1 fs/xfs/libxfs/xfs_btree.h | 1 fs/xfs/libxfs/xfs_types.h | 6 + fs/xfs/scrub/rcbag_btree.c | 314 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/rcbag_btree.h | 76 +++++++++++ fs/xfs/scrub/trace.h | 1 fs/xfs/xfs_trace.h | 1 7 files changed, 398 insertions(+), 2 deletions(-) create mode 100644 fs/xfs/scrub/rcbag_btree.c create mode 100644 fs/xfs/scrub/rcbag_btree.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index dfa142eb16f46..f927a43cc16a5 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -198,6 +198,7 @@ xfs-y += $(addprefix scrub/, \ inode_repair.o \ newbt.o \ nlinks_repair.o \ + rcbag_btree.o \ reap.o \ refcount_repair.o \ repair.o \ diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index edbcd4f0e9888..339b5561e5b04 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -62,6 +62,7 @@ union xfs_btree_rec { #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) #define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) #define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) +#define XFS_BTNUM_RCBAG ((xfs_btnum_t)XFS_BTNUM_RCBAGi) struct xfs_btree_ops; uint32_t xfs_btree_magic(struct xfs_mount *mp, const struct xfs_btree_ops *ops); diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 035bf703d719a..5556615a2ff9c 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -121,7 +121,8 @@ typedef enum { */ typedef enum { XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi, - XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_MAX + XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_RCBAGi, + XFS_BTNUM_MAX } xfs_btnum_t; #define XFS_BTNUM_STRINGS \ @@ -131,7 +132,8 @@ typedef enum { { XFS_BTNUM_BMAPi, "bmbt" }, \ { XFS_BTNUM_INOi, "inobt" }, \ { XFS_BTNUM_FINOi, "finobt" }, \ - { XFS_BTNUM_REFCi, "refcbt" } + { XFS_BTNUM_REFCi, "refcbt" }, \ + { XFS_BTNUM_RCBAGi, "rcbagbt" } struct xfs_name { const unsigned char *name; diff --git a/fs/xfs/scrub/rcbag_btree.c b/fs/xfs/scrub/rcbag_btree.c new file mode 100644 index 0000000000000..4b0c849321b25 --- /dev/null +++ b/fs/xfs/scrub/rcbag_btree.c @@ -0,0 +1,314 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@xxxxxxxxxx> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_btree_mem.h" +#include "xfs_error.h" +#include "scrub/xfile.h" +#include "scrub/xfbtree.h" +#include "scrub/rcbag_btree.h" +#include "scrub/trace.h" + +static struct kmem_cache *rcbagbt_cur_cache; + +STATIC void +rcbagbt_init_key_from_rec( + union xfs_btree_key *key, + const union xfs_btree_rec *rec) +{ + struct rcbag_key *bag_key = (struct rcbag_key *)key; + const struct rcbag_rec *bag_rec = (const struct rcbag_rec *)rec; + + BUILD_BUG_ON(sizeof(struct rcbag_key) > sizeof(union xfs_btree_key)); + BUILD_BUG_ON(sizeof(struct rcbag_rec) > sizeof(union xfs_btree_rec)); + + bag_key->rbg_startblock = bag_rec->rbg_startblock; + bag_key->rbg_blockcount = bag_rec->rbg_blockcount; +} + +STATIC void +rcbagbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + struct rcbag_rec *bag_rec = (struct rcbag_rec *)rec; + struct rcbag_rec *bag_irec = (struct rcbag_rec *)&cur->bc_rec; + + bag_rec->rbg_startblock = bag_irec->rbg_startblock; + bag_rec->rbg_blockcount = bag_irec->rbg_blockcount; + bag_rec->rbg_refcount = bag_irec->rbg_refcount; +} + +STATIC int64_t +rcbagbt_key_diff( + struct xfs_btree_cur *cur, + const union xfs_btree_key *key) +{ + struct rcbag_rec *rec = (struct rcbag_rec *)&cur->bc_rec; + const struct rcbag_key *kp = (const struct rcbag_key *)key; + + if (kp->rbg_startblock > rec->rbg_startblock) + return 1; + if (kp->rbg_startblock < rec->rbg_startblock) + return -1; + + if (kp->rbg_blockcount > rec->rbg_blockcount) + return 1; + if (kp->rbg_blockcount < rec->rbg_blockcount) + return -1; + + return 0; +} + +STATIC int64_t +rcbagbt_diff_two_keys( + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2, + const union xfs_btree_key *mask) +{ + const struct rcbag_key *kp1 = (const struct rcbag_key *)k1; + const struct rcbag_key *kp2 = (const struct rcbag_key *)k2; + + ASSERT(mask == NULL); + + if (kp1->rbg_startblock > kp2->rbg_startblock) + return 1; + if (kp1->rbg_startblock < kp2->rbg_startblock) + return -1; + + if (kp1->rbg_blockcount > kp2->rbg_blockcount) + return 1; + if (kp1->rbg_blockcount < kp2->rbg_blockcount) + return -1; + + return 0; +} + +STATIC int +rcbagbt_keys_inorder( + struct xfs_btree_cur *cur, + const union xfs_btree_key *k1, + const union xfs_btree_key *k2) +{ + const struct rcbag_key *kp1 = (const struct rcbag_key *)k1; + const struct rcbag_key *kp2 = (const struct rcbag_key *)k2; + + if (kp1->rbg_startblock > kp2->rbg_startblock) + return 0; + if (kp1->rbg_startblock < kp2->rbg_startblock) + return 1; + + if (kp1->rbg_blockcount > kp2->rbg_blockcount) + return 0; + if (kp1->rbg_blockcount < kp2->rbg_blockcount) + return 1; + + return 0; +} + +STATIC int +rcbagbt_recs_inorder( + struct xfs_btree_cur *cur, + const union xfs_btree_rec *r1, + const union xfs_btree_rec *r2) +{ + const struct rcbag_rec *rp1 = (const struct rcbag_rec *)r1; + const struct rcbag_rec *rp2 = (const struct rcbag_rec *)r2; + + if (rp1->rbg_startblock > rp2->rbg_startblock) + return 0; + if (rp1->rbg_startblock < rp2->rbg_startblock) + return 1; + + if (rp1->rbg_blockcount > rp2->rbg_blockcount) + return 0; + if (rp1->rbg_blockcount < rp2->rbg_blockcount) + return 1; + + return 0; +} + +static xfs_failaddr_t +rcbagbt_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; + unsigned int level; + + if (!xfs_verify_magic(bp, block->bb_magic)) + return __this_address; + + fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + if (fa) + return fa; + + level = be16_to_cpu(block->bb_level); + if (level >= rcbagbt_maxlevels_possible()) + return __this_address; + + return xfbtree_lblock_verify(bp, + rcbagbt_maxrecs(mp, xfo_to_b(1), level == 0)); +} + +static void +rcbagbt_rw_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa = rcbagbt_verify(bp); + + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); +} + +/* skip crc checks on in-memory btrees to save time */ +static const struct xfs_buf_ops rcbagbt_mem_buf_ops = { + .name = "rcbagbt_mem", + .magic = { 0, cpu_to_be32(RCBAG_MAGIC) }, + .verify_read = rcbagbt_rw_verify, + .verify_write = rcbagbt_rw_verify, + .verify_struct = rcbagbt_verify, +}; + +static const struct xfs_btree_ops rcbagbt_mem_ops = { + .rec_len = sizeof(struct rcbag_rec), + .key_len = sizeof(struct rcbag_key), + .lru_refs = 1, + .geom_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_LONG_PTRS | + XFS_BTREE_IN_XFILE, + + .dup_cursor = xfbtree_dup_cursor, + .set_root = xfbtree_set_root, + .alloc_block = xfbtree_alloc_block, + .free_block = xfbtree_free_block, + .get_minrecs = xfbtree_get_minrecs, + .get_maxrecs = xfbtree_get_maxrecs, + .init_key_from_rec = rcbagbt_init_key_from_rec, + .init_rec_from_cur = rcbagbt_init_rec_from_cur, + .init_ptr_from_cur = xfbtree_init_ptr_from_cur, + .key_diff = rcbagbt_key_diff, + .buf_ops = &rcbagbt_mem_buf_ops, + .diff_two_keys = rcbagbt_diff_two_keys, + .keys_inorder = rcbagbt_keys_inorder, + .recs_inorder = rcbagbt_recs_inorder, +}; + +/* Create a cursor for an in-memory btree. */ +struct xfs_btree_cur * +rcbagbt_mem_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_buf *head_bp, + struct xfbtree *xfbtree) +{ + struct xfs_btree_cur *cur; + + cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RCBAG, &rcbagbt_mem_ops, + rcbagbt_maxlevels_possible(), rcbagbt_cur_cache); + + cur->bc_mem.xfbtree = xfbtree; + cur->bc_mem.head_bp = head_bp; + cur->bc_nlevels = xfs_btree_mem_head_nlevels(head_bp); + return cur; +} + +/* Create an in-memory refcount bag btree. */ +int +rcbagbt_mem_create( + struct xfs_mount *mp, + struct xfs_buftarg *target, + struct xfbtree **xfbtreep) +{ + struct xfbtree_config cfg = { + .btree_ops = &rcbagbt_mem_ops, + .target = target, + }; + + return xfbtree_create(mp, &cfg, xfbtreep); +} + +/* Calculate number of records in a refcount bag btree block. */ +static inline unsigned int +rcbagbt_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct rcbag_rec); + return blocklen / + (sizeof(struct rcbag_key) + sizeof(rcbag_ptr_t)); +} + +/* + * Calculate number of records in an refcount bag btree block. + */ +unsigned int +rcbagbt_maxrecs( + struct xfs_mount *mp, + unsigned int blocklen, + bool leaf) +{ + blocklen -= RCBAG_BLOCK_LEN; + return rcbagbt_block_maxrecs(blocklen, leaf); +} + +#define RCBAGBT_INIT_MINRECS(minrecs) \ + do { \ + unsigned int blocklen; \ +\ + blocklen = PAGE_SIZE - XFS_BTREE_LBLOCK_CRC_LEN; \ +\ + minrecs[0] = rcbagbt_block_maxrecs(blocklen, true) / 2; \ + minrecs[1] = rcbagbt_block_maxrecs(blocklen, false) / 2; \ + } while (0) + +/* Compute the max possible height for refcount bag btrees. */ +unsigned int +rcbagbt_maxlevels_possible(void) +{ + unsigned int minrecs[2]; + + RCBAGBT_INIT_MINRECS(minrecs); + return xfs_btree_space_to_height(minrecs, ULLONG_MAX); +} + +/* Calculate the refcount bag btree size for some records. */ +unsigned long long +rcbagbt_calc_size( + unsigned long long nr_records) +{ + unsigned int minrecs[2]; + + RCBAGBT_INIT_MINRECS(minrecs); + return xfs_btree_calc_size(minrecs, nr_records); +} + +int __init +rcbagbt_init_cur_cache(void) +{ + rcbagbt_cur_cache = kmem_cache_create("xfs_rcbagbt_cur", + xfs_btree_cur_sizeof(rcbagbt_maxlevels_possible()), + 0, 0, NULL); + + if (!rcbagbt_cur_cache) + return -ENOMEM; + return 0; +} + +void +rcbagbt_destroy_cur_cache(void) +{ + kmem_cache_destroy(rcbagbt_cur_cache); + rcbagbt_cur_cache = NULL; +} diff --git a/fs/xfs/scrub/rcbag_btree.h b/fs/xfs/scrub/rcbag_btree.h new file mode 100644 index 0000000000000..dfe276cfd96c1 --- /dev/null +++ b/fs/xfs/scrub/rcbag_btree.h @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2022-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@xxxxxxxxxx> + */ +#ifndef __XFS_SCRUB_RCBAG_BTREE_H__ +#define __XFS_SCRUB_RCBAG_BTREE_H__ + +#ifdef CONFIG_XFS_BTREE_IN_XFILE + +struct xfs_buf; +struct xfs_btree_cur; +struct xfs_mount; + +#define RCBAG_MAGIC 0x74826671 /* 'JRBG' */ + +struct rcbag_key { + uint32_t rbg_startblock; + uint32_t rbg_blockcount; +}; + +struct rcbag_rec { + uint32_t rbg_startblock; + uint32_t rbg_blockcount; + uint64_t rbg_refcount; +}; + +typedef __be64 rcbag_ptr_t; + +/* reflinks only exist on crc enabled filesystems */ +#define RCBAG_BLOCK_LEN XFS_BTREE_LBLOCK_CRC_LEN + +/* + * Record, key, and pointer address macros for btree blocks. + * + * (note that some of these may appear unused, but they are used in userspace) + */ +#define RCBAG_REC_ADDR(block, index) \ + ((struct rcbag_rec *) \ + ((char *)(block) + RCBAG_BLOCK_LEN + \ + (((index) - 1) * sizeof(struct rcbag_rec)))) + +#define RCBAG_KEY_ADDR(block, index) \ + ((struct rcbag_key *) \ + ((char *)(block) + RCBAG_BLOCK_LEN + \ + ((index) - 1) * sizeof(struct rcbag_key))) + +#define RCBAG_PTR_ADDR(block, index, maxrecs) \ + ((rcbag_ptr_t *) \ + ((char *)(block) + RCBAG_BLOCK_LEN + \ + (maxrecs) * sizeof(struct rcbag_key) + \ + ((index) - 1) * sizeof(rcbag_ptr_t))) + +unsigned int rcbagbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen, + bool leaf); + +unsigned long long rcbagbt_calc_size(unsigned long long nr_records); + +unsigned int rcbagbt_maxlevels_possible(void); + +int __init rcbagbt_init_cur_cache(void); +void rcbagbt_destroy_cur_cache(void); + +struct xfbtree; +struct xfs_btree_cur *rcbagbt_mem_cursor(struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_buf *head_bp, + struct xfbtree *xfbtree); +int rcbagbt_mem_create(struct xfs_mount *mp, struct xfs_buftarg *target, + struct xfbtree **xfbtreep); + +#else +# define rcbagbt_init_cur_cache() 0 +# define rcbagbt_destroy_cur_cache() ((void)0) +#endif /* CONFIG_XFS_BTREE_IN_XFILE */ + +#endif /* __XFS_SCRUB_RCBAG_BTREE_H__ */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 14bbefdd7ab81..e8f71179e1eab 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -42,6 +42,7 @@ TRACE_DEFINE_ENUM(XFS_BTNUM_INOi); TRACE_DEFINE_ENUM(XFS_BTNUM_FINOi); TRACE_DEFINE_ENUM(XFS_BTNUM_RMAPi); TRACE_DEFINE_ENUM(XFS_BTNUM_REFCi); +TRACE_DEFINE_ENUM(XFS_BTNUM_RCBAGi); TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED); TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ba3eed23533f0..1690f518ae74b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2458,6 +2458,7 @@ TRACE_DEFINE_ENUM(XFS_BTNUM_INOi); TRACE_DEFINE_ENUM(XFS_BTNUM_FINOi); TRACE_DEFINE_ENUM(XFS_BTNUM_RMAPi); TRACE_DEFINE_ENUM(XFS_BTNUM_REFCi); +TRACE_DEFINE_ENUM(XFS_BTNUM_RCBAGi); DECLARE_EVENT_CLASS(xfs_btree_cur_class, TP_PROTO(struct xfs_btree_cur *cur, int level, struct xfs_buf *bp),