On Tue, Oct 31, 2017 at 04:22:28PM +0200, Christoph Hellwig wrote: > Replace the current linear list and the indirection array for the in-core > extent list with a b+tree to avoid the need for larger memory allocations > for the indirection array when lots of extents are present. The current > extent list implementations leads to heavy pressure on the memory > allocator when modifying files with a high extent count, and can lead > to high latencies because of that. > > The replacement is a b+tree with a few quirks. The leaf nodes directly > store the extent record in two u64 values. The encoding is a little bit > different from the existing in-core extent records so that the start > offset and length which are required for lookups can be retreived with > simple mask operations. The inner nodes store a 64-bit key containing > the start offset in the first half of the node, and the pointers to the > next lower level in the second half. In either case we walk the node > from the beginninig to the end and do a linear search, as that is more > efficient for the low number of cache lines touched during a search > (2 for the inner nodes, 4 for the leaf nodes) than a binary search. > We store termination markers (zero length for the leaf nodes, an > otherwise impossible high bit for the inner nodes) to terminate the key > list / records instead of storing a count to use the available cache > lines as efficiently as possible. > > One quirk of the algorithm is that while we normally split a node half and > half like usual btree implementations we just spill over entries added at > the very end of the list to a new node on its own. This means we get a > 100% fill grade for the common cases of bulk inseration at reading an > inode into memory, and when only sequentially appending to a file. The > downside is a slightly higher chance of splits on the first random > inserations. > > Both insert and removal manually recurse into the lower levels, but > the bulk deletion of the whole tree is still implemented as a recursive > function call, although one limited by the overall depth and with very > little stack usage in every iteration. > > For the first few extents we dynamically grow the list from a single > extent to the next powers of two until we have a first full leaf block > and that building the actual tree. > > The code started out based on the generic lib/btree.c code from Joern > Engel based on earlier work from Peter Zijlstra, but has since been > rewritten beyond recognition. Ok, time for a real review. > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > --- > fs/xfs/Makefile | 1 + > fs/xfs/libxfs/xfs_bmap.c | 19 +- > fs/xfs/libxfs/xfs_bmap_btree.c | 103 +--- > fs/xfs/libxfs/xfs_bmap_btree.h | 7 +- > fs/xfs/libxfs/xfs_format.h | 4 - > fs/xfs/libxfs/xfs_iext_tree.c | 1043 ++++++++++++++++++++++++++++++++++++++++ > fs/xfs/libxfs/xfs_inode_fork.c | 1035 +-------------------------------------- > fs/xfs/libxfs/xfs_inode_fork.h | 84 +--- > fs/xfs/libxfs/xfs_types.h | 3 +- > fs/xfs/scrub/bmap.c | 5 +- > fs/xfs/xfs_inode.c | 2 +- > fs/xfs/xfs_inode_item.c | 2 - > fs/xfs/xfs_trace.h | 51 +- > 13 files changed, 1103 insertions(+), 1256 deletions(-) > create mode 100644 fs/xfs/libxfs/xfs_iext_tree.c > > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile > index a2a5d046793d..7ceb41a9786a 100644 > --- a/fs/xfs/Makefile > +++ b/fs/xfs/Makefile > @@ -49,6 +49,7 @@ xfs-y += $(addprefix libxfs/, \ > xfs_dquot_buf.o \ > xfs_ialloc.o \ > xfs_ialloc_btree.o \ > + xfs_iext_tree.o \ > xfs_inode_fork.o \ > xfs_inode_buf.o \ > xfs_log_rlimit.o \ > diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c > index e020bd3f8717..46bda00dca79 100644 > --- a/fs/xfs/libxfs/xfs_bmap.c > +++ b/fs/xfs/libxfs/xfs_bmap.c > @@ -805,6 +805,8 @@ xfs_bmap_local_to_extents_empty( > xfs_bmap_forkoff_reset(ip, whichfork); > ifp->if_flags &= ~XFS_IFINLINE; > ifp->if_flags |= XFS_IFEXTENTS; > + ifp->if_u1.if_root = NULL; > + ifp->if_height = 0; > XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); > } > > @@ -846,8 +848,7 @@ xfs_bmap_local_to_extents( > > flags = 0; > error = 0; > - ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == > - XFS_IFINLINE); > + ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE); > memset(&args, 0, sizeof(args)); > args.tp = tp; > args.mp = ip->i_mount; > @@ -891,6 +892,9 @@ xfs_bmap_local_to_extents( > xfs_bmap_local_to_extents_empty(ip, whichfork); > flags |= XFS_ILOG_CORE; > > + ifp->if_u1.if_root = NULL; > + ifp->if_height = 0; > + > rec.br_startoff = 0; > rec.br_startblock = args.fsbno; > rec.br_blockcount = 1; > @@ -1177,6 +1181,7 @@ xfs_iread_extents( > xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork); > struct xfs_btree_block *block = ifp->if_broot; > struct xfs_iext_cursor ext; > + struct xfs_bmbt_irec new; > xfs_fsblock_t bno; > struct xfs_buf *bp; > xfs_extnum_t i, j; > @@ -1193,7 +1198,8 @@ xfs_iread_extents( > > ifp->if_bytes = 0; > ifp->if_real_bytes = 0; > - xfs_iext_add(ifp, 0, nextents); > + ifp->if_u1.if_root = NULL; > + ifp->if_height = 0; > > /* > * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. > @@ -1258,16 +1264,15 @@ xfs_iread_extents( > * Copy records into the extent records. > */ > frp = XFS_BMBT_REC_ADDR(mp, block, 1); > - for (j = 0; j < num_recs; j++, i++, frp++) { > - xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); > + for (j = 0; j < num_recs; j++, frp++, i++) { > if (!xfs_bmbt_validate_extent(mp, whichfork, frp)) { > XFS_ERROR_REPORT("xfs_bmap_read_extents(2)", > XFS_ERRLEVEL_LOW, mp); > error = -EFSCORRUPTED; > goto out_brelse; > } > - trp->l0 = be64_to_cpu(frp->l0); > - trp->l1 = be64_to_cpu(frp->l1); > + xfs_bmbt_disk_get_all(frp, &new); > + xfs_iext_insert(ip, &ext, 1, &new, state); > trace_xfs_read_extent(ip, &ext, state, _THIS_IP_); > xfs_iext_next(ifp, &ext); > } > diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c > index 89260972a0f6..c10aecaaae44 100644 > --- a/fs/xfs/libxfs/xfs_bmap_btree.c > +++ b/fs/xfs/libxfs/xfs_bmap_btree.c > @@ -71,73 +71,21 @@ xfs_bmdr_to_bmbt( > memcpy(tpp, fpp, sizeof(*fpp) * dmxr); > } > > -/* > - * Convert a compressed bmap extent record to an uncompressed form. > - * This code must be in sync with the routines xfs_bmbt_get_startoff, > - * xfs_bmbt_get_startblock and xfs_bmbt_get_blockcount. > - */ > -STATIC void > -__xfs_bmbt_get_all( > - uint64_t l0, > - uint64_t l1, > - xfs_bmbt_irec_t *s) > -{ > - int ext_flag; > - xfs_exntst_t st; > - > - ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); > - s->br_startoff = ((xfs_fileoff_t)l0 & > - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; > - s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | > - (((xfs_fsblock_t)l1) >> 21); > - s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); > - /* This is xfs_extent_state() in-line */ > - if (ext_flag) { > - ASSERT(s->br_blockcount != 0); /* saved for DMIG */ > - st = XFS_EXT_UNWRITTEN; > - } else > - st = XFS_EXT_NORM; > - s->br_state = st; > -} > - > void > -xfs_bmbt_get_all( > - xfs_bmbt_rec_host_t *r, > - xfs_bmbt_irec_t *s) > -{ > - __xfs_bmbt_get_all(r->l0, r->l1, s); > -} > - > -/* > - * Extract the blockcount field from an in memory bmap extent record. > - */ > -xfs_filblks_t > -xfs_bmbt_get_blockcount( > - xfs_bmbt_rec_host_t *r) > -{ > - return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); > -} > - > -/* > - * Extract the startblock field from an in memory bmap extent record. > - */ > -xfs_fsblock_t > -xfs_bmbt_get_startblock( > - xfs_bmbt_rec_host_t *r) > -{ > - return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | > - (((xfs_fsblock_t)r->l1) >> 21); > -} > - > -/* > - * Extract the startoff field from an in memory bmap extent record. > - */ > -xfs_fileoff_t > -xfs_bmbt_get_startoff( > - xfs_bmbt_rec_host_t *r) > -{ > - return ((xfs_fileoff_t)r->l0 & > - xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; > +xfs_bmbt_disk_get_all( > + struct xfs_bmbt_rec *rec, > + struct xfs_bmbt_irec *irec) > +{ > + uint64_t l0 = get_unaligned_be64(&rec->l0); > + uint64_t l1 = get_unaligned_be64(&rec->l1); > + > + irec->br_startoff = (l0 & xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; > + irec->br_startblock = ((l0 & xfs_mask64lo(9)) << 43) | (l1 >> 21); > + irec->br_blockcount = l1 & xfs_mask64lo(21); > + if (l0 >> (64 - BMBT_EXNTFLAG_BITLEN)) > + irec->br_state = XFS_EXT_UNWRITTEN; > + else > + irec->br_state = XFS_EXT_NORM; > } > > /* > @@ -161,29 +109,6 @@ xfs_bmbt_disk_get_startoff( > xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; > } > > -/* > - * Set all the fields in a bmap extent record from the uncompressed form. > - */ > -void > -xfs_bmbt_set_all( > - struct xfs_bmbt_rec_host *r, > - struct xfs_bmbt_irec *s) > -{ > - int extent_flag = (s->br_state != XFS_EXT_NORM); > - > - ASSERT(s->br_state == XFS_EXT_NORM || s->br_state == XFS_EXT_UNWRITTEN); > - ASSERT(!(s->br_startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN))); > - ASSERT(!(s->br_blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN))); > - ASSERT(!(s->br_startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN))); > - > - r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | > - ((xfs_bmbt_rec_base_t)s->br_startoff << 9) | > - ((xfs_bmbt_rec_base_t)s->br_startblock >> 43); > - r->l1 = ((xfs_bmbt_rec_base_t)s->br_startblock << 21) | > - ((xfs_bmbt_rec_base_t)s->br_blockcount & > - (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); > -} > - > /* > * Set all the fields in a bmap extent record from the uncompressed form. > */ > diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h > index 2fbfe2a24b15..714bfbaf9b2d 100644 > --- a/fs/xfs/libxfs/xfs_bmap_btree.h > +++ b/fs/xfs/libxfs/xfs_bmap_btree.h > @@ -98,16 +98,11 @@ struct xfs_trans; > */ > extern void xfs_bmdr_to_bmbt(struct xfs_inode *, xfs_bmdr_block_t *, int, > struct xfs_btree_block *, int); > -extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); > -extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); > -extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); > -extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); > > void xfs_bmbt_disk_set_all(struct xfs_bmbt_rec *r, struct xfs_bmbt_irec *s); > extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); > extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); > - > -extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); > +extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); > > extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, > xfs_bmdr_block_t *, int); > diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h > index 6470dfa768ee..28d5391d2272 100644 > --- a/fs/xfs/libxfs/xfs_format.h > +++ b/fs/xfs/libxfs/xfs_format.h > @@ -1553,10 +1553,6 @@ typedef struct xfs_bmbt_rec { > typedef uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ > typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; > > -typedef struct xfs_bmbt_rec_host { > - uint64_t l0, l1; > -} xfs_bmbt_rec_host_t; > - > /* > * Values and macros for delayed-allocation startblock fields. > */ > diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c > new file mode 100644 > index 000000000000..acb66c0677e7 > --- /dev/null > +++ b/fs/xfs/libxfs/xfs_iext_tree.c > @@ -0,0 +1,1043 @@ > +/* > + * Copyright (c) 2017 Christoph Hellwig. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + */ > + > +#include <linux/cache.h> > +#include <linux/kernel.h> > +#include <linux/slab.h> > +#include "xfs.h" > +#include "xfs_format.h" > +#include "xfs_bit.h" > +#include "xfs_log_format.h" > +#include "xfs_inode.h" > +#include "xfs_inode_fork.h" > +#include "xfs_trans_resv.h" > +#include "xfs_mount.h" > +#include "xfs_trace.h" > + > +/* > + * In-core extent record layout: > + * > + * +-------+----------------------------+ > + * | 00:51 | all 52 bits of startoff | > + * | 52:63 | low 12 bits of startblock | > + * +-------+----------------------------+ > + * | 00:20 | all 21 bits of length | > + * | 21 | unwritten extent bit | > + * | 22:63 | high 42 bits of startblock | > + * +-------+----------------------------+ > + */ > +#define XFS_IEXT_STARTOFF_MASK xfs_mask64lo(52) > +#define XFS_IEXT_LENGTH_MASK xfs_mask64lo(21) > +#define XFS_IEXT_STARTBLOCK_MASK xfs_mask64lo(54) > + > +struct xfs_iext_rec { > + uint64_t lo; > + uint64_t hi; > +}; > + > +/* > + * Given that the length can't be a zero, only an empty hi value indicates an > + * unused record. > + */ > +static bool xfs_iext_rec_is_empty(struct xfs_iext_rec *rec) > +{ > + return rec->hi == 0; > +} > + > +static inline void xfs_iext_rec_clear(struct xfs_iext_rec *rec) > +{ > + rec->lo = 0; > + rec->hi = 0; > +} > + > +static void > +xfs_iext_set( > + struct xfs_iext_rec *rec, > + struct xfs_bmbt_irec *irec) > +{ > + ASSERT((irec->br_startoff & ~XFS_IEXT_STARTOFF_MASK) == 0); > + ASSERT((irec->br_blockcount & ~XFS_IEXT_LENGTH_MASK) == 0); > + ASSERT((irec->br_startblock & ~XFS_IEXT_STARTBLOCK_MASK) == 0); > + > + rec->lo = irec->br_startoff & XFS_IEXT_STARTOFF_MASK; > + rec->hi = irec->br_blockcount & XFS_IEXT_LENGTH_MASK; > + > + rec->lo |= (irec->br_startblock << 52); > + rec->hi |= ((irec->br_startblock & ~xfs_mask64lo(12)) << (22 - 12)); > + > + if (irec->br_state == XFS_EXT_UNWRITTEN) > + rec->hi |= (1 << 21); > +} > + > +static void > +xfs_iext_get( > + struct xfs_bmbt_irec *irec, > + struct xfs_iext_rec *rec) > +{ > + irec->br_startoff = rec->lo & XFS_IEXT_STARTOFF_MASK; > + irec->br_blockcount = rec->hi & XFS_IEXT_LENGTH_MASK; > + > + irec->br_startblock = rec->lo >> 52; > + irec->br_startblock |= (rec->hi & xfs_mask64hi(42)) >> (22 - 12); > + > + if (rec->hi & (1 << 21)) > + irec->br_state = XFS_EXT_UNWRITTEN; > + else > + irec->br_state = XFS_EXT_NORM; > +} > + > +enum { > + NODE_SIZE = L1_CACHE_BYTES * 4, > + KEYS_PER_NODE = NODE_SIZE / (sizeof(uint64_t) + sizeof(void *)), > + RECS_PER_LEAF = (NODE_SIZE - sizeof(uint64_t) - sizeof(uint64_t)) / > + sizeof(struct xfs_iext_rec), > +}; > + > +/* > + * In-core extent btree block layout: > + * > + * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks. > + * > + * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each > + * contain the startoffset, blockcount, startblock and unwritten extent flag. > + * See above for the exact format, followed by pointers to the previous and next > + * leaf blocks (if there are any). > + * > + * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed > + * by an equal number of pointers to the btree blocks at the next lower level. > + * > + * Note that we currently always allocate 64-bits worth for pointers in the > + * inner nodes or the link pointers in the leaf nodes even on 32-bit We do? sizeof(void *) == 4 on 32-bit; on a i686 machine this evaluates to.... NODE_SIZE = 64 * 4, KEYS_PER_NODE = 256 / (8 + 4) = 21 While on x64 this becomes: KEYS_PER_NODE = 256 / (8 + 8) = 16 ...right? RECS_PER_LEAF = (256 - 8 - 8) / 16 = 15 Does RECS_PER_LEAF reserve two uint64_t for the prev and next pointers in struct xfs_iext_leaf? It'd be a little more clear if the definition was: RECS_PER_LEAF = (NODE_SIZE - (2 * sizeof(void *))) / sizeof(xfs_iext_rec) Frankly I wonder why not just have: (NODE_SIZE - sizeof(xfs_iext_leaf_tail)) / sizeof(iext_rec) Though I suppose practically speaking none of this changes RECS_PER_LEAF. > + * architectures, so that we can use consistent addressing using and array of > + * 64-bit unsigned integers. If someone still cares for 32-bit architectures > + * this could be optimized a bit better for them. > + * > + * +-------+-------+-------+-------+-------+----------+----------+ > + * Leaf: | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr | > + * +-------+-------+-------+-------+-------+----------+----------+ > + * > + * +-------+-------+-------+-------+-------+-------+------+-------+ > + * Inner: | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N | > + * +-------+-------+-------+-------+-------+-------+------+-------+ > + */ > +struct xfs_iext_node { > + uint64_t keys[KEYS_PER_NODE]; > +#define XFS_IEXT_KEY_INVALID (1ULL << 63) > + void *ptrs[KEYS_PER_NODE]; > +}; Hm, ok, let's do the math here. The data forks can have at most 2^32 extents, and the attr fork can have at most 2^16 extents. The L1 cache sizes range from 4 bytes (h8300) to 256 bytes (s390), with x64 at 64 bytes and arm64/ppc64 at 128. This means that this is broken on h8300 because it's not possible to create a leaf containing even one record. Not that even a 32-byte leaf is acceptable in terms of overhead. Perhaps NODE_SIZE should have a minimum? For the data fork on x64, the largest our tree can get is: 2^32 max extent records RECS_PER_LEAF = 15 KEYS_PER_NODE = 16 level 0 = 286,331,154 leaf blocks level 1 = 17,895,698 nodes level 2 = 1,118,482 nodes level 3 = 69,906 nodes level 4 = 4,370 nodes level 5 = 274 nodes level 6 = 18 nodes level 7 = 2 nodes level 8 = 1 node == 305,419,904 blocks, or 78GB of RAM? The leaves eat 68G, the nodes eat 10G. Now let's try s390: RECS_PER_LEAF = 63 KEYS_PER_NODE = 64 level 0 = 68,174,085 leaf blocks level 1 = 1,065,221 nodes level 2 = 16,645 nodes level 3 = 261 nodes level 4 = 5 nodes level 5 = 1 node == 69,256,218 blocks, or 71GB, or 3G tree overhead. Hm, I suppose ~80G of RAM to map ~16T of space isn't bad, assuming each extent maps a single 4k block. I guess it's pretty horrible if each block is instead 512b. However, prior to this code we'd just fall over dead. :) > + > +struct xfs_iext_leaf { > + struct xfs_iext_rec recs[RECS_PER_LEAF]; > + struct xfs_iext_leaf *prev; > + struct xfs_iext_leaf *next; > +}; > + > +inline xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp) static inline? > +{ > + return ifp->if_bytes / sizeof(struct xfs_iext_rec); > +} > + > +static inline int xfs_iext_max_recs(struct xfs_ifork *ifp) > +{ > + if (ifp->if_height == 1) > + return xfs_iext_count(ifp); > + return RECS_PER_LEAF; > +} > + > +static inline struct xfs_iext_rec *cur_rec(struct xfs_iext_cursor *cur) > +{ > + return &cur->leaf->recs[cur->pos]; > +} > + > +static noinline bool xfs_iext_valid(struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + if (cur->pos < 0) > + return false; > + if (cur->pos >= xfs_iext_max_recs(ifp)) > + return false; > + if (!cur->leaf) > + return false; > + if (xfs_iext_rec_is_empty(cur_rec(cur))) > + return false; > + return true; > +} > + > +static void * > +xfs_iext_find_first_leaf( > + struct xfs_ifork *ifp) > +{ > + struct xfs_iext_node *node = ifp->if_u1.if_root; > + int height; > + > + if (!ifp->if_height) > + return NULL; > + > + for (height = ifp->if_height; height > 1; height--) { > + node = node->ptrs[0]; > + ASSERT(node); > + } > + > + return node; > +} > + > +static void * > +xfs_iext_find_last_leaf( > + struct xfs_ifork *ifp) > +{ > + struct xfs_iext_node *node = ifp->if_u1.if_root; > + int height, i; > + > + if (!ifp->if_height) > + return NULL; > + > + for (height = ifp->if_height; height > 1; height--) { > + for (i = 1; i < KEYS_PER_NODE; i++) > + if (!node->ptrs[i]) > + break; > + node = node->ptrs[i - 1]; > + ASSERT(node); > + } > + > + return node; > +} > + > +void > +xfs_iext_first( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + cur->pos = 0; > + cur->leaf = xfs_iext_find_first_leaf(ifp); > +} > + > +void > +xfs_iext_last( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + int i; > + > + cur->leaf = xfs_iext_find_last_leaf(ifp); > + if (!cur->leaf) { > + cur->pos = 0; > + return; > + } > + > + for (i = 1; i < xfs_iext_max_recs(ifp); i++) { > + if (xfs_iext_rec_is_empty(&cur->leaf->recs[i])) > + break; > + } > + cur->pos = i - 1; > +} > + > +void > +xfs_iext_next( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + if (!cur->leaf) { > + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF); > + xfs_iext_first(ifp, cur); > + return; > + } > + > + ASSERT(cur->pos >= 0); > + ASSERT(cur->pos < xfs_iext_max_recs(ifp)); > + > + cur->pos++; > + if (!xfs_iext_valid(ifp, cur) && ifp->if_height > 1 && > + cur->leaf && cur->leaf->next) { > + cur->leaf = cur->leaf->next; > + cur->pos = 0; > + } > +} > + > +void > +xfs_iext_prev( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + if (!cur->leaf) { > + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF); > + xfs_iext_last(ifp, cur); > + return; > + } > + > + ASSERT(cur->pos >= 0); > + ASSERT(cur->pos <= RECS_PER_LEAF); > + > +recurse: > + do { > + cur->pos--; > + if (xfs_iext_valid(ifp, cur)) > + return; > + } while (cur->pos > 0); > + > + if (ifp->if_height > 1 && cur->leaf->prev) { > + cur->leaf = cur->leaf->prev; > + cur->pos = RECS_PER_LEAF; > + goto recurse; > + } > +} > + > +static inline int > +xfs_iext_key_cmp( > + struct xfs_iext_node *node, > + int n, > + xfs_fileoff_t offset) > +{ > + if (node->keys[n] > offset) > + return 1; > + if (node->keys[n] < offset) > + return -1; > + return 0; > +} > + > +static inline int > +xfs_iext_rec_cmp( > + struct xfs_iext_rec *rec, > + xfs_fileoff_t offset) > +{ > + uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK; > + u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; > + > + if (rec_offset > offset) > + return 1; > + if (rec_offset + rec_len <= offset) > + return -1; > + return 0; > +} > + > +static void * > +xfs_iext_find_level( > + struct xfs_ifork *ifp, > + xfs_fileoff_t offset, > + int level) > +{ > + struct xfs_iext_node *node = ifp->if_u1.if_root; > + int height, i; > + > + if (!ifp->if_height) > + return NULL; > + > + for (height = ifp->if_height; height > level; height--) { > + for (i = 1; i < KEYS_PER_NODE; i++) > + if (xfs_iext_key_cmp(node, i, offset) > 0) > + break; > + > + node = node->ptrs[i - 1]; > + if (!node) > + break; > + } > + > + return node; > +} > + > +static int > +xfs_iext_node_pos( > + struct xfs_iext_node *node, > + xfs_fileoff_t offset) > +{ > + int i; > + > + for (i = 1; i < KEYS_PER_NODE; i++) { > + if (xfs_iext_key_cmp(node, i, offset) > 0) > + break; > + } > + > + return i - 1; > +} > + > +static int > +xfs_iext_node_insert_pos( > + struct xfs_iext_node *node, > + xfs_fileoff_t offset) > +{ > + int i; > + > + for (i = 0; i < KEYS_PER_NODE; i++) { > + if (xfs_iext_key_cmp(node, i, offset) > 0) > + return i; > + } > + > + return KEYS_PER_NODE; > +} > + > +static int > +xfs_iext_node_nr_entries( > + struct xfs_iext_node *node, > + int start) > +{ > + int i; > + > + for (i = start; i < KEYS_PER_NODE; i++) { > + if (node->keys[i] == XFS_IEXT_KEY_INVALID) > + break; > + } > + > + return i; > +} > + > +static int > +xfs_iext_leaf_nr_entries( > + struct xfs_ifork *ifp, > + struct xfs_iext_leaf *leaf, > + int start) > +{ > + int i; > + > + for (i = start; i < xfs_iext_max_recs(ifp); i++) { > + if (xfs_iext_rec_is_empty(&leaf->recs[i])) > + break; > + } > + > + return i; > +} > + > +static inline uint64_t > +xfs_iext_leaf_key( > + struct xfs_iext_leaf *leaf, > + int n) > +{ > + return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK; > +} > + > +static void > +xfs_iext_grow( > + struct xfs_ifork *ifp) > +{ > + struct xfs_iext_node *node = kmem_zalloc(NODE_SIZE, KM_NOFS); > + int i; > + > + if (ifp->if_height == 1) { > + struct xfs_iext_leaf *prev = ifp->if_u1.if_root; > + > + node->keys[0] = xfs_iext_leaf_key(prev, 0); > + node->ptrs[0] = prev; > + } else { > + struct xfs_iext_node *prev = ifp->if_u1.if_root; > + > + ASSERT(ifp->if_height > 1); > + > + node->keys[0] = prev->keys[0]; > + node->ptrs[0] = prev; > + } > + > + for (i = 1; i < KEYS_PER_NODE; i++) > + node->keys[i] = XFS_IEXT_KEY_INVALID; > + > + ifp->if_u1.if_root = node; > + ifp->if_height++; > +} > + > +static void > +xfs_iext_update_node( > + struct xfs_ifork *ifp, > + xfs_fileoff_t old_offset, > + xfs_fileoff_t new_offset, > + int level, > + void *ptr) > +{ > + struct xfs_iext_node *node = ifp->if_u1.if_root; > + int height, i; > + > + for (height = ifp->if_height; height > level; height--) { > + for (i = 0; i < KEYS_PER_NODE; i++) { > + if (i > 0 && xfs_iext_key_cmp(node, i, old_offset) > 0) > + break; > + if (node->keys[i] == old_offset) > + node->keys[i] = new_offset; > + } > + node = node->ptrs[i - 1]; > + ASSERT(node); > + } > + > + ASSERT(node == ptr); > +} > + > +static struct xfs_iext_node * > +xfs_iext_split_node( > + struct xfs_iext_node **nodep, > + int *pos, > + int *nr_entries) > +{ > + struct xfs_iext_node *node = *nodep; > + struct xfs_iext_node *new = kmem_zalloc(NODE_SIZE, KM_NOFS); > + const int nr_move = KEYS_PER_NODE / 2; > + int nr_keep = nr_move + (KEYS_PER_NODE & 1); > + int i = 0; > + > + /* for sequential append operations just spill over into the new node */ > + if (*pos == KEYS_PER_NODE) { > + *nodep = new; > + *pos = 0; > + *nr_entries = 0; > + goto done; > + } > + > + > + for (i = 0; i < nr_move; i++) { > + new->keys[i] = node->keys[nr_keep + i]; > + new->ptrs[i] = node->ptrs[nr_keep + i]; > + > + node->keys[nr_keep + i] = XFS_IEXT_KEY_INVALID; > + node->ptrs[nr_keep + i] = NULL; > + } > + > + if (*pos >= nr_keep) { > + *nodep = new; > + *pos -= nr_keep; > + *nr_entries = nr_move; > + } else { > + *nr_entries = nr_keep; > + } > +done: > + for (; i < KEYS_PER_NODE; i++) > + new->keys[i] = XFS_IEXT_KEY_INVALID; > + return new; > +} > + > +static void > +xfs_iext_insert_node( > + struct xfs_ifork *ifp, > + uint64_t offset, > + void *ptr, > + int level) > +{ > + struct xfs_iext_node *node, *new; > + int i, pos, nr_entries; > + > +again: > + if (ifp->if_height < level) > + xfs_iext_grow(ifp); > + > + new = NULL; > + node = xfs_iext_find_level(ifp, offset, level); > + pos = xfs_iext_node_insert_pos(node, offset); > + nr_entries = xfs_iext_node_nr_entries(node, pos); > + > + ASSERT(pos >= nr_entries || xfs_iext_key_cmp(node, pos, offset) != 0); > + ASSERT(nr_entries <= KEYS_PER_NODE); > + > + if (nr_entries == KEYS_PER_NODE) > + new = xfs_iext_split_node(&node, &pos, &nr_entries); > + > + if (node != new && pos == 0 && nr_entries > 0) > + xfs_iext_update_node(ifp, node->keys[0], offset, level, node); > + > + for (i = nr_entries; i > pos; i--) { > + node->keys[i] = node->keys[i - 1]; > + node->ptrs[i] = node->ptrs[i - 1]; > + } /me wonders if memmove is more appropriate here, but I'm guessing the loop came out of lib/btree.c? --D > + node->keys[pos] = offset; > + node->ptrs[pos] = ptr; > + > + if (new) { > + offset = new->keys[0]; > + ptr = new; > + level++; > + goto again; > + } > +} > + > +static struct xfs_iext_leaf * > +xfs_iext_split_leaf( > + struct xfs_iext_cursor *cur, > + int *nr_entries) > +{ > + struct xfs_iext_leaf *leaf = cur->leaf; > + struct xfs_iext_leaf *new = kmem_zalloc(NODE_SIZE, KM_NOFS); > + const int nr_move = RECS_PER_LEAF / 2; > + int nr_keep = nr_move + (RECS_PER_LEAF & 1); > + int i; > + > + /* for sequential append operations just spill over into the new node */ > + if (cur->pos == KEYS_PER_NODE) { > + cur->leaf = new; > + cur->pos = 0; > + *nr_entries = 0; > + goto done; > + } > + > + if (nr_keep & 1) > + nr_keep++; > + > + for (i = 0; i < nr_move; i++) { > + new->recs[i] = leaf->recs[nr_keep + i]; > + xfs_iext_rec_clear(&leaf->recs[nr_keep + i]); > + } > + > + if (cur->pos >= nr_keep) { > + cur->leaf = new; > + cur->pos -= nr_keep; > + *nr_entries = nr_move; > + } else { > + *nr_entries = nr_keep; > + } > +done: > + if (leaf->next) > + leaf->next->prev = new; > + new->next = leaf->next; > + new->prev = leaf; > + leaf->next = new; > + return new; > +} > + > +static void > +xfs_iext_alloc_root( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + ASSERT(ifp->if_bytes == 0); > + > + ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS); > + ifp->if_height = 1; > + > + /* now that we have a node step into it */ > + cur->leaf = ifp->if_u1.if_root; > + cur->pos = 0; > +} > + > +static void > +xfs_iext_realloc_root( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec); > + void *new; > + > + /* account for the prev/next pointers */ > + if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) > + new_size = NODE_SIZE; > + > + new = kmem_realloc(ifp->if_u1.if_root, new_size, KM_NOFS); > + memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); > + ifp->if_u1.if_root = new; > + cur->leaf = new; > +} > + > +static void > +__xfs_iext_insert( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur, > + struct xfs_bmbt_irec *irec) > +{ > + xfs_fileoff_t offset = irec->br_startoff; > + struct xfs_iext_leaf *new = NULL; > + int nr_entries, i; > + > + if (ifp->if_height == 0) > + xfs_iext_alloc_root(ifp, cur); > + else if (ifp->if_height == 1) > + xfs_iext_realloc_root(ifp, cur); > + > + nr_entries = xfs_iext_leaf_nr_entries(ifp, cur->leaf, cur->pos); > + ASSERT(nr_entries <= RECS_PER_LEAF); > + ASSERT(cur->pos >= nr_entries || > + xfs_iext_rec_cmp(cur_rec(cur), irec->br_startoff) != 0); > + > + if (nr_entries == RECS_PER_LEAF) > + new = xfs_iext_split_leaf(cur, &nr_entries); > + > + if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) { > + xfs_iext_update_node(ifp, xfs_iext_leaf_key(cur->leaf, 0), offset, 1, > + cur->leaf); > + } > + > + for (i = nr_entries; i > cur->pos; i--) > + cur->leaf->recs[i] = cur->leaf->recs[i - 1]; > + xfs_iext_set(cur_rec(cur), irec); > + ifp->if_bytes += sizeof(struct xfs_iext_rec); > + > + if (new) > + xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); > +} > + > +void > +xfs_iext_insert( > + struct xfs_inode *ip, > + struct xfs_iext_cursor *cur, > + xfs_extnum_t nr_extents, > + struct xfs_bmbt_irec *new, > + int state) > +{ > + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); > + int i; > + > + ASSERT(nr_extents > 0); > + > + for (i = nr_extents - 1; i >= 0; i--) { > + __xfs_iext_insert(ifp, cur, new + i); > + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); > + } > +} > + > +static struct xfs_iext_node * > +xfs_iext_rebalance_node( > + struct xfs_iext_node *parent, > + int *pos, > + struct xfs_iext_node *node, > + int nr_entries) > +{ > + if (nr_entries == 0) > + return node; > + > + if (*pos > 0) { > + struct xfs_iext_node *prev = parent->ptrs[*pos - 1]; > + int nr_prev = xfs_iext_node_nr_entries(prev, 0), i; > + > + if (nr_prev + nr_entries <= KEYS_PER_NODE) { > + for (i = 0; i < nr_entries; i++) { > + prev->keys[nr_prev + i] = node->keys[i]; > + prev->ptrs[nr_prev + i] = node->ptrs[i]; > + } > + return node; > + } > + } > + > + if (*pos + 1 < xfs_iext_node_nr_entries(parent, *pos)) { > + struct xfs_iext_node *next = parent->ptrs[*pos + 1]; > + int nr_next = xfs_iext_node_nr_entries(next, 0), i; > + > + if (nr_entries + nr_next <= KEYS_PER_NODE) { > + for (i = 0; i < nr_next; i++) { > + node->keys[nr_entries + i] = next->keys[i]; > + node->ptrs[nr_entries + i] = next->ptrs[i]; > + } > + > + ++*pos; > + return next; > + } > + } > + > + return NULL; > +} > + > +static void > +xfs_iext_remove_node( > + struct xfs_ifork *ifp, > + xfs_fileoff_t offset, > + void *victim) > +{ > + struct xfs_iext_node *node, *parent; > + int level = 2, pos, nr_entries, i; > + > + ASSERT(level <= ifp->if_height); > + node = xfs_iext_find_level(ifp, offset, level); > + pos = xfs_iext_node_pos(node, offset); > +again: > + ASSERT(node->ptrs[pos]); > + ASSERT(node->ptrs[pos] == victim); > + kmem_free(victim); > + > + nr_entries = xfs_iext_node_nr_entries(node, pos) - 1; > + offset = node->keys[0]; > + for (i = pos; i < nr_entries; i++) { > + node->keys[i] = node->keys[i + 1]; > + node->ptrs[i] = node->ptrs[i + 1]; > + } > + node->keys[nr_entries] = XFS_IEXT_KEY_INVALID; > + node->ptrs[nr_entries] = NULL; > + > + if (pos == 0 && nr_entries > 0) { > + xfs_iext_update_node(ifp, offset, node->keys[0], level, > + node); > + offset = node->keys[0]; > + } > + > + if (nr_entries >= KEYS_PER_NODE / 2) > + return; > + > + if (level < ifp->if_height) { > + level++; > + parent = xfs_iext_find_level(ifp, offset, level); > + pos = xfs_iext_node_pos(parent, offset); > + > + ASSERT(pos != KEYS_PER_NODE); > + ASSERT(parent->ptrs[pos] == node); > + > + node = xfs_iext_rebalance_node(parent, &pos, node, nr_entries); > + if (node) { > + offset = node->keys[0]; > + victim = node; > + node = parent; > + goto again; > + } > + } else if (nr_entries == 1) { > + ASSERT(node == ifp->if_u1.if_root); > + ifp->if_u1.if_root = node->ptrs[0]; > + ifp->if_height--; > + kmem_free(node); > + } > +} > + > +static void > +xfs_iext_rebalance_leaf( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur, > + struct xfs_iext_leaf *leaf, > + xfs_fileoff_t offset, > + int fill) > +{ > + if (leaf->prev) { > + int nr_prev = xfs_iext_leaf_nr_entries(ifp, leaf->prev, 0), i; > + > + if (nr_prev + fill <= RECS_PER_LEAF) { > + for (i = 0; i < fill; i++) > + leaf->prev->recs[nr_prev + i] = leaf->recs[i]; > + > + if (cur->leaf == leaf) { > + cur->leaf = leaf->prev; > + cur->pos += nr_prev; > + } > + goto remove_node; > + } > + } > + > + if (leaf->next) { > + int nr_next = xfs_iext_leaf_nr_entries(ifp, leaf->next, 0), i; > + > + if (fill + nr_next <= RECS_PER_LEAF) { > + for (i = 0; i < nr_next; i++) > + leaf->recs[fill + i] = leaf->next->recs[i]; > + > + if (cur->leaf == leaf->next) { > + cur->leaf = leaf; > + cur->pos += fill; > + } > + > + offset = xfs_iext_leaf_key(leaf->next, 0); > + leaf = leaf->next; > + goto remove_node; > + } > + } > + > + return; > +remove_node: > + if (leaf->prev) > + leaf->prev->next = leaf->next; > + if (leaf->next) > + leaf->next->prev = leaf->prev; > + xfs_iext_remove_node(ifp, offset, leaf); > +} > + > +static void > +xfs_iext_free_last_leaf( > + struct xfs_ifork *ifp) > +{ > + ifp->if_u1.if_root = NULL; > + ifp->if_height--; > + kmem_free(ifp->if_u1.if_root); > +} > + > +static void > +__xfs_iext_remove( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur) > +{ > + struct xfs_iext_leaf *leaf = cur->leaf; > + xfs_fileoff_t offset = xfs_iext_leaf_key(leaf, 0); > + int i, nr_entries; > + > + ASSERT(ifp->if_height > 0); > + ASSERT(ifp->if_u1.if_root != NULL); > + ASSERT(xfs_iext_valid(ifp, cur)); > + > + nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1; > + for (i = cur->pos; i < nr_entries; i++) > + leaf->recs[i] = leaf->recs[i + 1]; > + xfs_iext_rec_clear(&leaf->recs[nr_entries]); > + ifp->if_bytes -= sizeof(struct xfs_iext_rec); > + > + if (cur->pos == 0 && nr_entries > 0) { > + xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1, > + leaf); > + offset = xfs_iext_leaf_key(leaf, 0); > + } else if (cur->pos == nr_entries) { > + if (ifp->if_height > 1 && leaf->next) > + cur->leaf = leaf->next; > + else > + cur->leaf = NULL; > + cur->pos = 0; > + } > + > + if (nr_entries >= RECS_PER_LEAF / 2) > + return; > + > + if (ifp->if_height > 1) > + xfs_iext_rebalance_leaf(ifp, cur, leaf, offset, nr_entries); > + else if (nr_entries == 0) > + xfs_iext_free_last_leaf(ifp); > +} > + > +void > +xfs_iext_remove( > + struct xfs_inode *ip, > + struct xfs_iext_cursor *cur, > + int nr_extents, > + int state) > +{ > + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); > + int i; > + > + ASSERT(nr_extents > 0); > + > + for (i = 0; i < nr_extents; i++) { > + trace_xfs_iext_remove(ip, cur, state, _RET_IP_); > + __xfs_iext_remove(ifp, cur); > + } > +} > + > +/* > + * Lookup the extent covering bno. > + * > + * If there is an extent covering bno return the extent index, and store the > + * expanded extent structure in *gotp, and the extent cursor in *cur. > + * If there is no extent covering bno, but there is an extent after it (e.g. > + * it lies in a hole) return that extent in *gotp and its cursor in *cur > + * instead. > + * If bno is beyond the last extent return false, and return an invalid > + * cursor value. > + */ > +bool > +xfs_iext_lookup_extent( > + struct xfs_inode *ip, > + struct xfs_ifork *ifp, > + xfs_fileoff_t offset, > + struct xfs_iext_cursor *cur, > + struct xfs_bmbt_irec *gotp) > +{ > + XFS_STATS_INC(ip->i_mount, xs_look_exlist); > + > + cur->leaf = xfs_iext_find_level(ifp, offset, 1); > + if (!cur->leaf) { > + cur->pos = 0; > + return false; > + } > + > + for (cur->pos = 0; cur->pos < xfs_iext_max_recs(ifp); cur->pos++) { > + struct xfs_iext_rec *rec = cur_rec(cur); > + > + if (xfs_iext_rec_is_empty(rec)) > + break; > + if (xfs_iext_rec_cmp(rec, offset) >= 0) > + goto found; > + } > + > + /* Try looking in the next node for an entry > offset */ > + if (ifp->if_height == 1 || !cur->leaf->next) > + return false; > + cur->leaf = cur->leaf->next; > + cur->pos = 0; > + if (!xfs_iext_valid(ifp, cur)) > + return false; > +found: > + xfs_iext_get(gotp, cur_rec(cur)); > + return true; > +} > + > +/* > + * Returns the last extent before end, and if this extent doesn't cover > + * end, update end to the end of the extent. > + */ > +bool > +xfs_iext_lookup_extent_before( > + struct xfs_inode *ip, > + struct xfs_ifork *ifp, > + xfs_fileoff_t *end, > + struct xfs_iext_cursor *cur, > + struct xfs_bmbt_irec *gotp) > +{ > + /* could be optimized to not even look up the next on a match.. */ > + if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) && > + gotp->br_startoff <= *end - 1) > + return true; > + if (!xfs_iext_prev_extent(ifp, cur, gotp)) > + return false; > + *end = gotp->br_startoff + gotp->br_blockcount; > + return true; > +} > + > +void > +xfs_iext_update_extent( > + struct xfs_inode *ip, > + int state, > + struct xfs_iext_cursor *cur, > + struct xfs_bmbt_irec *new) > +{ > + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); > + > + if (cur->pos == 0) { > + struct xfs_bmbt_irec old; > + > + xfs_iext_get(&old, cur_rec(cur)); > + if (new->br_startoff != old.br_startoff) { > + xfs_iext_update_node(ifp, old.br_startoff, > + new->br_startoff, 1, cur->leaf); > + } > + } > + > + trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_); > + xfs_iext_set(cur_rec(cur), new); > + trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_); > +} > + > +/* > + * Return true if there is an extent at cursor cur and return the expanded > + * extent structure at cur in gotp in that case. Else return false. > + */ > +bool > +xfs_iext_get_extent( > + struct xfs_ifork *ifp, > + struct xfs_iext_cursor *cur, > + struct xfs_bmbt_irec *gotp) > +{ > + if (!xfs_iext_valid(ifp, cur)) > + return false; > + xfs_iext_get(gotp, cur_rec(cur)); > + return true; > +} > + > +/* > + * This is a recursive function, because of that we need to be extremely > + * careful with stack usage. > + */ > +static void > +xfs_iext_destroy_node( > + struct xfs_iext_node *node, > + int level) > +{ > + int i; > + > + if (level > 1) { > + for (i = 0; i < KEYS_PER_NODE; i++) { > + if (node->keys[i] == XFS_IEXT_KEY_INVALID) > + break; > + xfs_iext_destroy_node(node->ptrs[i], level - 1); > + } > + } > + > + kmem_free(node); > +} > + > +void > +xfs_iext_destroy( > + struct xfs_ifork *ifp) > +{ > + xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height); > + > + ifp->if_bytes = 0; > + ifp->if_height = 0; > + ifp->if_u1.if_root = NULL; > +} > diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c > index 5ac341d2b093..2711ff6ab2b3 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.c > +++ b/fs/xfs/libxfs/xfs_inode_fork.c > @@ -331,6 +331,7 @@ xfs_iformat_extents( > int size = nex * sizeof(xfs_bmbt_rec_t); > struct xfs_iext_cursor ext; > struct xfs_bmbt_rec *dp; > + struct xfs_bmbt_irec new; > int i; > > /* > @@ -346,27 +347,22 @@ xfs_iformat_extents( > } > > ifp->if_real_bytes = 0; > - if (nex == 0) > - ifp->if_u1.if_extents = NULL; > - else > - xfs_iext_add(ifp, 0, nex); > - > - ifp->if_bytes = size; > + ifp->if_bytes = 0; > + ifp->if_u1.if_root = NULL; > + ifp->if_height = 0; > if (size) { > dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); > > xfs_iext_first(ifp, &ext); > for (i = 0; i < nex; i++, dp++) { > - xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); > - > if (!xfs_bmbt_validate_extent(mp, whichfork, dp)) { > XFS_ERROR_REPORT("xfs_iformat_extents(2)", > XFS_ERRLEVEL_LOW, mp); > return -EFSCORRUPTED; > } > > - ep->l0 = get_unaligned_be64(&dp->l0); > - ep->l1 = get_unaligned_be64(&dp->l1); > + xfs_bmbt_disk_get_all(dp, &new); > + xfs_iext_insert(ip, &ext, 1, &new, state); > trace_xfs_read_extent(ip, &ext, state, _THIS_IP_); > xfs_iext_next(ifp, &ext); > } > @@ -435,6 +431,10 @@ xfs_iformat_btree( > ifp->if_flags &= ~XFS_IFEXTENTS; > ifp->if_flags |= XFS_IFBROOT; > > + ifp->if_real_bytes = 0; > + ifp->if_bytes = 0; > + ifp->if_u1.if_root = NULL; > + ifp->if_height = 0; > return 0; > } > > @@ -662,14 +662,12 @@ xfs_idestroy_fork( > ifp->if_u1.if_data = NULL; > ifp->if_real_bytes = 0; > } > - } else if ((ifp->if_flags & XFS_IFEXTENTS) && > - ((ifp->if_flags & XFS_IFEXTIREC) || > - (ifp->if_u1.if_extents != NULL))) { > - ASSERT(ifp->if_real_bytes != 0); > + } else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) { > xfs_iext_destroy(ifp); > } > - ASSERT(ifp->if_u1.if_extents == NULL); > + > ASSERT(ifp->if_real_bytes == 0); > + > if (whichfork == XFS_ATTR_FORK) { > kmem_zone_free(xfs_ifork_zone, ip->i_afp); > ip->i_afp = NULL; > @@ -679,13 +677,6 @@ xfs_idestroy_fork( > } > } > > -/* Count number of incore extents based on if_bytes */ > -xfs_extnum_t > -xfs_iext_count(struct xfs_ifork *ifp) > -{ > - return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); > -} > - > /* > * Convert in-core extents to on-disk form > * > @@ -780,7 +771,6 @@ xfs_iflush_fork( > !(iip->ili_fields & extflag[whichfork])); > if ((iip->ili_fields & extflag[whichfork]) && > (ifp->if_bytes > 0)) { > - ASSERT(xfs_iext_get_ext(ifp, 0)); > ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0); > (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp, > whichfork); > @@ -812,33 +802,6 @@ xfs_iflush_fork( > } > } > > -/* > - * Return a pointer to the extent record at file index idx. > - */ > -xfs_bmbt_rec_host_t * > -xfs_iext_get_ext( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_extnum_t idx) /* index of target extent */ > -{ > - ASSERT(idx >= 0); > - ASSERT(idx < xfs_iext_count(ifp)); > - > - if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) { > - return ifp->if_u1.if_ext_irec->er_extbuf; > - } else if (ifp->if_flags & XFS_IFEXTIREC) { > - xfs_ext_irec_t *erp; /* irec pointer */ > - int erp_idx = 0; /* irec index */ > - xfs_extnum_t page_idx = idx; /* ext index in target list */ > - > - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); > - return &erp->er_extbuf[page_idx]; > - } else if (ifp->if_bytes) { > - return &ifp->if_u1.if_extents[idx]; > - } else { > - return NULL; > - } > -} > - > /* Convert bmap state flags to an inode fork. */ > struct xfs_ifork * > xfs_iext_state_to_fork( > @@ -852,894 +815,6 @@ xfs_iext_state_to_fork( > return &ip->i_df; > } > > -/* > - * Insert new item(s) into the extent records for incore inode > - * fork 'ifp'. 'count' new items are inserted at index 'idx'. > - */ > -void > -xfs_iext_insert( > - xfs_inode_t *ip, /* incore inode pointer */ > - struct xfs_iext_cursor *cur, > - xfs_extnum_t count, /* number of inserted items */ > - xfs_bmbt_irec_t *new, /* items to insert */ > - int state) /* type of extent conversion */ > -{ > - xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); > - xfs_extnum_t i; /* extent record index */ > - > - trace_xfs_iext_insert(ip, cur->idx, new, state, _RET_IP_); > - > - ASSERT(ifp->if_flags & XFS_IFEXTENTS); > - xfs_iext_add(ifp, cur->idx, count); > - for (i = 0; i < count; i++, new++) > - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, cur->idx + i), new); > -} > - > -/* > - * This is called when the amount of space required for incore file > - * extents needs to be increased. The ext_diff parameter stores the > - * number of new extents being added and the idx parameter contains > - * the extent index where the new extents will be added. If the new > - * extents are being appended, then we just need to (re)allocate and > - * initialize the space. Otherwise, if the new extents are being > - * inserted into the middle of the existing entries, a bit more work > - * is required to make room for the new extents to be inserted. The > - * caller is responsible for filling in the new extent entries upon > - * return. > - */ > -void > -xfs_iext_add( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_extnum_t idx, /* index to begin adding exts */ > - int ext_diff) /* number of extents to add */ > -{ > - int byte_diff; /* new bytes being added */ > - int new_size; /* size of extents after adding */ > - xfs_extnum_t nextents; /* number of extents in file */ > - > - nextents = xfs_iext_count(ifp); > - ASSERT((idx >= 0) && (idx <= nextents)); > - byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t); > - new_size = ifp->if_bytes + byte_diff; > - > - /* > - * Use a linear (direct) extent list. > - * If the extents are currently inside the inode, > - * xfs_iext_realloc_direct will switch us from > - * inline to direct extent allocation mode. > - */ > - if (nextents + ext_diff <= XFS_LINEAR_EXTS) { > - xfs_iext_realloc_direct(ifp, new_size); > - if (idx < nextents) { > - memmove(&ifp->if_u1.if_extents[idx + ext_diff], > - &ifp->if_u1.if_extents[idx], > - (nextents - idx) * sizeof(xfs_bmbt_rec_t)); > - memset(&ifp->if_u1.if_extents[idx], 0, byte_diff); > - } > - } > - /* Indirection array */ > - else { > - xfs_ext_irec_t *erp; > - int erp_idx = 0; > - int page_idx = idx; > - > - ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS); > - if (ifp->if_flags & XFS_IFEXTIREC) { > - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1); > - } else { > - xfs_iext_irec_init(ifp); > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - erp = ifp->if_u1.if_ext_irec; > - } > - /* Extents fit in target extent page */ > - if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) { > - if (page_idx < erp->er_extcount) { > - memmove(&erp->er_extbuf[page_idx + ext_diff], > - &erp->er_extbuf[page_idx], > - (erp->er_extcount - page_idx) * > - sizeof(xfs_bmbt_rec_t)); > - memset(&erp->er_extbuf[page_idx], 0, byte_diff); > - } > - erp->er_extcount += ext_diff; > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); > - } > - /* Insert a new extent page */ > - else if (erp) { > - xfs_iext_add_indirect_multi(ifp, > - erp_idx, page_idx, ext_diff); > - } > - /* > - * If extent(s) are being appended to the last page in > - * the indirection array and the new extent(s) don't fit > - * in the page, then erp is NULL and erp_idx is set to > - * the next index needed in the indirection array. > - */ > - else { > - uint count = ext_diff; > - > - while (count) { > - erp = xfs_iext_irec_new(ifp, erp_idx); > - erp->er_extcount = min(count, XFS_LINEAR_EXTS); > - count -= erp->er_extcount; > - if (count) > - erp_idx++; > - } > - } > - } > - ifp->if_bytes = new_size; > -} > - > -/* > - * This is called when incore extents are being added to the indirection > - * array and the new extents do not fit in the target extent list. The > - * erp_idx parameter contains the irec index for the target extent list > - * in the indirection array, and the idx parameter contains the extent > - * index within the list. The number of extents being added is stored > - * in the count parameter. > - * > - * |-------| |-------| > - * | | | | idx - number of extents before idx > - * | idx | | count | > - * | | | | count - number of extents being inserted at idx > - * |-------| |-------| > - * | count | | nex2 | nex2 - number of extents after idx + count > - * |-------| |-------| > - */ > -void > -xfs_iext_add_indirect_multi( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - int erp_idx, /* target extent irec index */ > - xfs_extnum_t idx, /* index within target list */ > - int count) /* new extents being added */ > -{ > - int byte_diff; /* new bytes being added */ > - xfs_ext_irec_t *erp; /* pointer to irec entry */ > - xfs_extnum_t ext_diff; /* number of extents to add */ > - xfs_extnum_t ext_cnt; /* new extents still needed */ > - xfs_extnum_t nex2; /* extents after idx + count */ > - xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */ > - int nlists; /* number of irec's (lists) */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - erp = &ifp->if_u1.if_ext_irec[erp_idx]; > - nex2 = erp->er_extcount - idx; > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - > - /* > - * Save second part of target extent list > - * (all extents past */ > - if (nex2) { > - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); > - nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); > - memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); > - erp->er_extcount -= nex2; > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); > - memset(&erp->er_extbuf[idx], 0, byte_diff); > - } > - > - /* > - * Add the new extents to the end of the target > - * list, then allocate new irec record(s) and > - * extent buffer(s) as needed to store the rest > - * of the new extents. > - */ > - ext_cnt = count; > - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount); > - if (ext_diff) { > - erp->er_extcount += ext_diff; > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); > - ext_cnt -= ext_diff; > - } > - while (ext_cnt) { > - erp_idx++; > - erp = xfs_iext_irec_new(ifp, erp_idx); > - ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS); > - erp->er_extcount = ext_diff; > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff); > - ext_cnt -= ext_diff; > - } > - > - /* Add nex2 extents back to indirection array */ > - if (nex2) { > - xfs_extnum_t ext_avail; > - int i; > - > - byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); > - ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; > - i = 0; > - /* > - * If nex2 extents fit in the current page, append > - * nex2_ep after the new extents. > - */ > - if (nex2 <= ext_avail) { > - i = erp->er_extcount; > - } > - /* > - * Otherwise, check if space is available in the > - * next page. > - */ > - else if ((erp_idx < nlists - 1) && > - (nex2 <= (ext_avail = XFS_LINEAR_EXTS - > - ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) { > - erp_idx++; > - erp++; > - /* Create a hole for nex2 extents */ > - memmove(&erp->er_extbuf[nex2], erp->er_extbuf, > - erp->er_extcount * sizeof(xfs_bmbt_rec_t)); > - } > - /* > - * Final choice, create a new extent page for > - * nex2 extents. > - */ > - else { > - erp_idx++; > - erp = xfs_iext_irec_new(ifp, erp_idx); > - } > - memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); > - kmem_free(nex2_ep); > - erp->er_extcount += nex2; > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); > - } > -} > - > -/* > - * This is called when the amount of space required for incore file > - * extents needs to be decreased. The ext_diff parameter stores the > - * number of extents to be removed and the idx parameter contains > - * the extent index where the extents will be removed from. > - * > - * If the amount of space needed has decreased below the linear > - * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous > - * extent array. Otherwise, use kmem_realloc() to adjust the > - * size to what is needed. > - */ > -void > -xfs_iext_remove( > - xfs_inode_t *ip, /* incore inode pointer */ > - struct xfs_iext_cursor *cur, > - int ext_diff, /* number of extents to remove */ > - int state) /* type of extent conversion */ > -{ > - xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state); > - xfs_extnum_t nextents; /* number of extents in file */ > - int new_size; /* size of extents after removal */ > - > - trace_xfs_iext_remove(ip, cur, state, _RET_IP_); > - > - ASSERT(ext_diff > 0); > - nextents = xfs_iext_count(ifp); > - new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t); > - > - if (new_size == 0) { > - xfs_iext_destroy(ifp); > - } else if (ifp->if_flags & XFS_IFEXTIREC) { > - xfs_iext_remove_indirect(ifp, cur->idx, ext_diff); > - } else if (ifp->if_real_bytes) { > - xfs_iext_remove_direct(ifp, cur->idx, ext_diff); > - } > - ifp->if_bytes = new_size; > -} > - > -/* > - * This removes ext_diff extents from a linear (direct) extent list, > - * beginning at extent index idx. If the extents are being removed > - * from the end of the list (ie. truncate) then we just need to re- > - * allocate the list to remove the extra space. Otherwise, if the > - * extents are being removed from the middle of the existing extent > - * entries, then we first need to move the extent records beginning > - * at idx + ext_diff up in the list to overwrite the records being > - * removed, then remove the extra space via kmem_realloc. > - */ > -void > -xfs_iext_remove_direct( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_extnum_t idx, /* index to begin removing exts */ > - int ext_diff) /* number of extents to remove */ > -{ > - xfs_extnum_t nextents; /* number of extents in file */ > - int new_size; /* size of extents after removal */ > - > - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); > - new_size = ifp->if_bytes - > - (ext_diff * sizeof(xfs_bmbt_rec_t)); > - nextents = xfs_iext_count(ifp); > - > - if (new_size == 0) { > - xfs_iext_destroy(ifp); > - return; > - } > - /* Move extents up in the list (if needed) */ > - if (idx + ext_diff < nextents) { > - memmove(&ifp->if_u1.if_extents[idx], > - &ifp->if_u1.if_extents[idx + ext_diff], > - (nextents - (idx + ext_diff)) * > - sizeof(xfs_bmbt_rec_t)); > - } > - memset(&ifp->if_u1.if_extents[nextents - ext_diff], > - 0, ext_diff * sizeof(xfs_bmbt_rec_t)); > - /* > - * Reallocate the direct extent list. If the extents > - * will fit inside the inode then xfs_iext_realloc_direct > - * will switch from direct to inline extent allocation > - * mode for us. > - */ > - xfs_iext_realloc_direct(ifp, new_size); > - ifp->if_bytes = new_size; > -} > - > -/* > - * This is called when incore extents are being removed from the > - * indirection array and the extents being removed span multiple extent > - * buffers. The idx parameter contains the file extent index where we > - * want to begin removing extents, and the count parameter contains > - * how many extents need to be removed. > - * > - * |-------| |-------| > - * | nex1 | | | nex1 - number of extents before idx > - * |-------| | count | > - * | | | | count - number of extents being removed at idx > - * | count | |-------| > - * | | | nex2 | nex2 - number of extents after idx + count > - * |-------| |-------| > - */ > -void > -xfs_iext_remove_indirect( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_extnum_t idx, /* index to begin removing extents */ > - int count) /* number of extents to remove */ > -{ > - xfs_ext_irec_t *erp; /* indirection array pointer */ > - int erp_idx = 0; /* indirection array index */ > - xfs_extnum_t ext_cnt; /* extents left to remove */ > - xfs_extnum_t ext_diff; /* extents to remove in current list */ > - xfs_extnum_t nex1; /* number of extents before idx */ > - xfs_extnum_t nex2; /* extents after idx + count */ > - int page_idx = idx; /* index in target extent list */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0); > - ASSERT(erp != NULL); > - nex1 = page_idx; > - ext_cnt = count; > - while (ext_cnt) { > - nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0); > - ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1)); > - /* > - * Check for deletion of entire list; > - * xfs_iext_irec_remove() updates extent offsets. > - */ > - if (ext_diff == erp->er_extcount) { > - xfs_iext_irec_remove(ifp, erp_idx); > - ext_cnt -= ext_diff; > - nex1 = 0; > - if (ext_cnt) { > - ASSERT(erp_idx < ifp->if_real_bytes / > - XFS_IEXT_BUFSZ); > - erp = &ifp->if_u1.if_ext_irec[erp_idx]; > - nex1 = 0; > - continue; > - } else { > - break; > - } > - } > - /* Move extents up (if needed) */ > - if (nex2) { > - memmove(&erp->er_extbuf[nex1], > - &erp->er_extbuf[nex1 + ext_diff], > - nex2 * sizeof(xfs_bmbt_rec_t)); > - } > - /* Zero out rest of page */ > - memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ - > - ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t)))); > - /* Update remaining counters */ > - erp->er_extcount -= ext_diff; > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff); > - ext_cnt -= ext_diff; > - nex1 = 0; > - erp_idx++; > - erp++; > - } > - ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t); > - xfs_iext_irec_compact(ifp); > -} > - > -/* > - * Create, destroy, or resize a linear (direct) block of extents. > - */ > -void > -xfs_iext_realloc_direct( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - int new_size) /* new size of extents after adding */ > -{ > - int rnew_size; /* real new size of extents */ > - > - rnew_size = new_size; > - > - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) || > - ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) && > - (new_size != ifp->if_real_bytes))); > - > - /* Free extent records */ > - if (new_size == 0) { > - xfs_iext_destroy(ifp); > - } else { > - if (!is_power_of_2(new_size)){ > - rnew_size = roundup_pow_of_two(new_size); > - } > - if (rnew_size != ifp->if_real_bytes) { > - ifp->if_u1.if_extents = > - kmem_realloc(ifp->if_u1.if_extents, > - rnew_size, KM_NOFS); > - } > - if (rnew_size > ifp->if_real_bytes) { > - memset(&ifp->if_u1.if_extents[ifp->if_bytes / > - (uint)sizeof(xfs_bmbt_rec_t)], 0, > - rnew_size - ifp->if_real_bytes); > - } > - } > - ifp->if_real_bytes = rnew_size; > - ifp->if_bytes = new_size; > -} > - > -/* > - * Resize an extent indirection array to new_size bytes. > - */ > -STATIC void > -xfs_iext_realloc_indirect( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - int new_size) /* new indirection array size */ > -{ > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - ASSERT(ifp->if_real_bytes); > - ASSERT((new_size >= 0) && > - (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) * > - sizeof(xfs_ext_irec_t)))); > - if (new_size == 0) { > - xfs_iext_destroy(ifp); > - } else { > - ifp->if_u1.if_ext_irec = > - kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS); > - } > -} > - > -/* > - * Switch from indirection array to linear (direct) extent allocations. > - */ > -STATIC void > -xfs_iext_indirect_to_direct( > - xfs_ifork_t *ifp) /* inode fork pointer */ > -{ > - xfs_bmbt_rec_host_t *ep; /* extent record pointer */ > - xfs_extnum_t nextents; /* number of extents in file */ > - int size; /* size of file extents */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nextents = xfs_iext_count(ifp); > - ASSERT(nextents <= XFS_LINEAR_EXTS); > - size = nextents * sizeof(xfs_bmbt_rec_t); > - > - xfs_iext_irec_compact_pages(ifp); > - ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); > - > - ep = ifp->if_u1.if_ext_irec->er_extbuf; > - kmem_free(ifp->if_u1.if_ext_irec); > - ifp->if_flags &= ~XFS_IFEXTIREC; > - ifp->if_u1.if_extents = ep; > - ifp->if_bytes = size; > - if (nextents < XFS_LINEAR_EXTS) { > - xfs_iext_realloc_direct(ifp, size); > - } > -} > - > -/* > - * Remove all records from the indirection array. > - */ > -STATIC void > -xfs_iext_irec_remove_all( > - struct xfs_ifork *ifp) > -{ > - int nlists; > - int i; > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - for (i = 0; i < nlists; i++) > - kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); > - kmem_free(ifp->if_u1.if_ext_irec); > - ifp->if_flags &= ~XFS_IFEXTIREC; > -} > - > -/* > - * Free incore file extents. > - */ > -void > -xfs_iext_destroy( > - xfs_ifork_t *ifp) /* inode fork pointer */ > -{ > - if (ifp->if_flags & XFS_IFEXTIREC) { > - xfs_iext_irec_remove_all(ifp); > - } else if (ifp->if_real_bytes) { > - kmem_free(ifp->if_u1.if_extents); > - } > - ifp->if_u1.if_extents = NULL; > - ifp->if_real_bytes = 0; > - ifp->if_bytes = 0; > -} > - > -/* > - * Return a pointer to the extent record for file system block bno. > - */ > -xfs_bmbt_rec_host_t * /* pointer to found extent record */ > -xfs_iext_bno_to_ext( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_fileoff_t bno, /* block number to search for */ > - xfs_extnum_t *idxp) /* index of target extent */ > -{ > - xfs_bmbt_rec_host_t *base; /* pointer to first extent */ > - xfs_filblks_t blockcount = 0; /* number of blocks in extent */ > - xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */ > - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ > - int high; /* upper boundary in search */ > - xfs_extnum_t idx = 0; /* index of target extent */ > - int low; /* lower boundary in search */ > - xfs_extnum_t nextents; /* number of file extents */ > - xfs_fileoff_t startoff = 0; /* start offset of extent */ > - > - nextents = xfs_iext_count(ifp); > - if (nextents == 0) { > - *idxp = 0; > - return NULL; > - } > - low = 0; > - if (ifp->if_flags & XFS_IFEXTIREC) { > - /* Find target extent list */ > - int erp_idx = 0; > - erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx); > - base = erp->er_extbuf; > - high = erp->er_extcount - 1; > - } else { > - base = ifp->if_u1.if_extents; > - high = nextents - 1; > - } > - /* Binary search extent records */ > - while (low <= high) { > - idx = (low + high) >> 1; > - ep = base + idx; > - startoff = xfs_bmbt_get_startoff(ep); > - blockcount = xfs_bmbt_get_blockcount(ep); > - if (bno < startoff) { > - high = idx - 1; > - } else if (bno >= startoff + blockcount) { > - low = idx + 1; > - } else { > - /* Convert back to file-based extent index */ > - if (ifp->if_flags & XFS_IFEXTIREC) { > - idx += erp->er_extoff; > - } > - *idxp = idx; > - return ep; > - } > - } > - /* Convert back to file-based extent index */ > - if (ifp->if_flags & XFS_IFEXTIREC) { > - idx += erp->er_extoff; > - } > - if (bno >= startoff + blockcount) { > - if (++idx == nextents) { > - ep = NULL; > - } else { > - ep = xfs_iext_get_ext(ifp, idx); > - } > - } > - *idxp = idx; > - return ep; > -} > - > -/* > - * Return a pointer to the indirection array entry containing the > - * extent record for filesystem block bno. Store the index of the > - * target irec in *erp_idxp. > - */ > -xfs_ext_irec_t * /* pointer to found extent record */ > -xfs_iext_bno_to_irec( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_fileoff_t bno, /* block number to search for */ > - int *erp_idxp) /* irec index of target ext list */ > -{ > - xfs_ext_irec_t *erp = NULL; /* indirection array pointer */ > - xfs_ext_irec_t *erp_next; /* next indirection array entry */ > - int erp_idx; /* indirection array index */ > - int nlists; /* number of extent irec's (lists) */ > - int high; /* binary search upper limit */ > - int low; /* binary search lower limit */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - erp_idx = 0; > - low = 0; > - high = nlists - 1; > - while (low <= high) { > - erp_idx = (low + high) >> 1; > - erp = &ifp->if_u1.if_ext_irec[erp_idx]; > - erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL; > - if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) { > - high = erp_idx - 1; > - } else if (erp_next && bno >= > - xfs_bmbt_get_startoff(erp_next->er_extbuf)) { > - low = erp_idx + 1; > - } else { > - break; > - } > - } > - *erp_idxp = erp_idx; > - return erp; > -} > - > -/* > - * Return a pointer to the indirection array entry containing the > - * extent record at file extent index *idxp. Store the index of the > - * target irec in *erp_idxp and store the page index of the target > - * extent record in *idxp. > - */ > -xfs_ext_irec_t * > -xfs_iext_idx_to_irec( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - xfs_extnum_t *idxp, /* extent index (file -> page) */ > - int *erp_idxp, /* pointer to target irec */ > - int realloc) /* new bytes were just added */ > -{ > - xfs_ext_irec_t *prev; /* pointer to previous irec */ > - xfs_ext_irec_t *erp = NULL; /* pointer to current irec */ > - int erp_idx; /* indirection array index */ > - int nlists; /* number of irec's (ex lists) */ > - int high; /* binary search upper limit */ > - int low; /* binary search lower limit */ > - xfs_extnum_t page_idx = *idxp; /* extent index in target list */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - ASSERT(page_idx >= 0); > - ASSERT(page_idx <= xfs_iext_count(ifp)); > - ASSERT(page_idx < xfs_iext_count(ifp) || realloc); > - > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - erp_idx = 0; > - low = 0; > - high = nlists - 1; > - > - /* Binary search extent irec's */ > - while (low <= high) { > - erp_idx = (low + high) >> 1; > - erp = &ifp->if_u1.if_ext_irec[erp_idx]; > - prev = erp_idx > 0 ? erp - 1 : NULL; > - if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff && > - realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) { > - high = erp_idx - 1; > - } else if (page_idx > erp->er_extoff + erp->er_extcount || > - (page_idx == erp->er_extoff + erp->er_extcount && > - !realloc)) { > - low = erp_idx + 1; > - } else if (page_idx == erp->er_extoff + erp->er_extcount && > - erp->er_extcount == XFS_LINEAR_EXTS) { > - ASSERT(realloc); > - page_idx = 0; > - erp_idx++; > - erp = erp_idx < nlists ? erp + 1 : NULL; > - break; > - } else { > - page_idx -= erp->er_extoff; > - break; > - } > - } > - *idxp = page_idx; > - *erp_idxp = erp_idx; > - return erp; > -} > - > -/* > - * Allocate and initialize an indirection array once the space needed > - * for incore extents increases above XFS_IEXT_BUFSZ. > - */ > -void > -xfs_iext_irec_init( > - xfs_ifork_t *ifp) /* inode fork pointer */ > -{ > - xfs_ext_irec_t *erp; /* indirection array pointer */ > - xfs_extnum_t nextents; /* number of extents in file */ > - > - ASSERT(!(ifp->if_flags & XFS_IFEXTIREC)); > - nextents = xfs_iext_count(ifp); > - ASSERT(nextents <= XFS_LINEAR_EXTS); > - > - erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); > - > - if (nextents == 0) { > - ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); > - } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { > - xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ); > - } > - erp->er_extbuf = ifp->if_u1.if_extents; > - erp->er_extcount = nextents; > - erp->er_extoff = 0; > - > - ifp->if_flags |= XFS_IFEXTIREC; > - ifp->if_real_bytes = XFS_IEXT_BUFSZ; > - ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t); > - ifp->if_u1.if_ext_irec = erp; > - > - return; > -} > - > -/* > - * Allocate and initialize a new entry in the indirection array. > - */ > -xfs_ext_irec_t * > -xfs_iext_irec_new( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - int erp_idx) /* index for new irec */ > -{ > - xfs_ext_irec_t *erp; /* indirection array pointer */ > - int i; /* loop counter */ > - int nlists; /* number of irec's (ex lists) */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - > - /* Resize indirection array */ > - xfs_iext_realloc_indirect(ifp, ++nlists * > - sizeof(xfs_ext_irec_t)); > - /* > - * Move records down in the array so the > - * new page can use erp_idx. > - */ > - erp = ifp->if_u1.if_ext_irec; > - for (i = nlists - 1; i > erp_idx; i--) { > - memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t)); > - } > - ASSERT(i == erp_idx); > - > - /* Initialize new extent record */ > - erp = ifp->if_u1.if_ext_irec; > - erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); > - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; > - memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); > - erp[erp_idx].er_extcount = 0; > - erp[erp_idx].er_extoff = erp_idx > 0 ? > - erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0; > - return (&erp[erp_idx]); > -} > - > -/* > - * Remove a record from the indirection array. > - */ > -void > -xfs_iext_irec_remove( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - int erp_idx) /* irec index to remove */ > -{ > - xfs_ext_irec_t *erp; /* indirection array pointer */ > - int i; /* loop counter */ > - int nlists; /* number of irec's (ex lists) */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - erp = &ifp->if_u1.if_ext_irec[erp_idx]; > - if (erp->er_extbuf) { > - xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, > - -erp->er_extcount); > - kmem_free(erp->er_extbuf); > - } > - /* Compact extent records */ > - erp = ifp->if_u1.if_ext_irec; > - for (i = erp_idx; i < nlists - 1; i++) { > - memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t)); > - } > - /* > - * Manually free the last extent record from the indirection > - * array. A call to xfs_iext_realloc_indirect() with a size > - * of zero would result in a call to xfs_iext_destroy() which > - * would in turn call this function again, creating a nasty > - * infinite loop. > - */ > - if (--nlists) { > - xfs_iext_realloc_indirect(ifp, > - nlists * sizeof(xfs_ext_irec_t)); > - } else { > - kmem_free(ifp->if_u1.if_ext_irec); > - } > - ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; > -} > - > -/* > - * This is called to clean up large amounts of unused memory allocated > - * by the indirection array. Before compacting anything though, verify > - * that the indirection array is still needed and switch back to the > - * linear extent list (or even the inline buffer) if possible. The > - * compaction policy is as follows: > - * > - * Full Compaction: Extents fit into a single page (or inline buffer) > - * Partial Compaction: Extents occupy less than 50% of allocated space > - * No Compaction: Extents occupy at least 50% of allocated space > - */ > -void > -xfs_iext_irec_compact( > - xfs_ifork_t *ifp) /* inode fork pointer */ > -{ > - xfs_extnum_t nextents; /* number of extents in file */ > - int nlists; /* number of irec's (ex lists) */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - nextents = xfs_iext_count(ifp); > - > - if (nextents == 0) { > - xfs_iext_destroy(ifp); > - } else if (nextents <= XFS_LINEAR_EXTS) { > - xfs_iext_indirect_to_direct(ifp); > - } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { > - xfs_iext_irec_compact_pages(ifp); > - } > -} > - > -/* > - * Combine extents from neighboring extent pages. > - */ > -void > -xfs_iext_irec_compact_pages( > - xfs_ifork_t *ifp) /* inode fork pointer */ > -{ > - xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */ > - int erp_idx = 0; /* indirection array index */ > - int nlists; /* number of irec's (ex lists) */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - while (erp_idx < nlists - 1) { > - erp = &ifp->if_u1.if_ext_irec[erp_idx]; > - erp_next = erp + 1; > - if (erp_next->er_extcount <= > - (XFS_LINEAR_EXTS - erp->er_extcount)) { > - memcpy(&erp->er_extbuf[erp->er_extcount], > - erp_next->er_extbuf, erp_next->er_extcount * > - sizeof(xfs_bmbt_rec_t)); > - erp->er_extcount += erp_next->er_extcount; > - /* > - * Free page before removing extent record > - * so er_extoffs don't get modified in > - * xfs_iext_irec_remove. > - */ > - kmem_free(erp_next->er_extbuf); > - erp_next->er_extbuf = NULL; > - xfs_iext_irec_remove(ifp, erp_idx + 1); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - } else { > - erp_idx++; > - } > - } > -} > - > -/* > - * This is called to update the er_extoff field in the indirection > - * array when extents have been added or removed from one of the > - * extent lists. erp_idx contains the irec index to begin updating > - * at and ext_diff contains the number of extents that were added > - * or removed. > - */ > -void > -xfs_iext_irec_update_extoffs( > - xfs_ifork_t *ifp, /* inode fork pointer */ > - int erp_idx, /* irec index to update */ > - int ext_diff) /* number of new extents */ > -{ > - int i; /* loop counter */ > - int nlists; /* number of irec's (ex lists */ > - > - ASSERT(ifp->if_flags & XFS_IFEXTIREC); > - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; > - for (i = erp_idx; i < nlists; i++) { > - ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff; > - } > -} > - > /* > * Initialize an inode's copy-on-write fork. > */ > @@ -1756,87 +831,3 @@ xfs_ifork_init_cow( > ip->i_cformat = XFS_DINODE_FMT_EXTENTS; > ip->i_cnextents = 0; > } > - > -/* > - * Lookup the extent covering bno. > - * > - * If there is an extent covering bno return the extent index, and store the > - * expanded extent structure in *gotp, and the extent cursor in *cur. > - * If there is no extent covering bno, but there is an extent after it (e.g. > - * it lies in a hole) return that extent in *gotp and its cursor in *cur > - * instead. > - * If bno is beyond the last extent return false, and return an invalid > - * cursor value. > - */ > -bool > -xfs_iext_lookup_extent( > - struct xfs_inode *ip, > - struct xfs_ifork *ifp, > - xfs_fileoff_t bno, > - struct xfs_iext_cursor *cur, > - struct xfs_bmbt_irec *gotp) > -{ > - struct xfs_bmbt_rec_host *ep; > - > - XFS_STATS_INC(ip->i_mount, xs_look_exlist); > - > - ep = xfs_iext_bno_to_ext(ifp, bno, &cur->idx); > - if (!ep) > - return false; > - xfs_bmbt_get_all(ep, gotp); > - return true; > -} > - > -/* > - * Returns the last extent before end, and if this extent doesn't cover > - * end, update end to the end of the extent. > - */ > -bool > -xfs_iext_lookup_extent_before( > - struct xfs_inode *ip, > - struct xfs_ifork *ifp, > - xfs_fileoff_t *end, > - struct xfs_iext_cursor *cur, > - struct xfs_bmbt_irec *gotp) > -{ > - if (xfs_iext_lookup_extent(ip, ifp, *end - 1, cur, gotp) && > - gotp->br_startoff <= *end - 1) > - return true; > - if (!xfs_iext_prev_extent(ifp, cur, gotp)) > - return false; > - *end = gotp->br_startoff + gotp->br_blockcount; > - return true; > -} > - > -/* > - * Return true if there is an extent at cursor cur and return the expanded > - * extent structure at cur in gotp in that case. Else return false. > - */ > -bool > -xfs_iext_get_extent( > - struct xfs_ifork *ifp, > - struct xfs_iext_cursor *cur, > - struct xfs_bmbt_irec *gotp) > -{ > - if (cur->idx < 0 || cur->idx >= xfs_iext_count(ifp)) > - return false; > - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, cur->idx), gotp); > - return true; > -} > - > -void > -xfs_iext_update_extent( > - struct xfs_inode *ip, > - int state, > - struct xfs_iext_cursor *cur, > - struct xfs_bmbt_irec *gotp) > -{ > - struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); > - > - ASSERT(cur->idx >= 0); > - ASSERT(cur->idx < xfs_iext_count(ifp)); > - > - trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_); > - xfs_bmbt_set_all(xfs_iext_get_ext(ifp, cur->idx), gotp); > - trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_); > -} > diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h > index 508f13784334..015872caaab4 100644 > --- a/fs/xfs/libxfs/xfs_inode_fork.h > +++ b/fs/xfs/libxfs/xfs_inode_fork.h > @@ -21,45 +21,18 @@ > struct xfs_inode_log_item; > struct xfs_dinode; > > -/* > - * The following xfs_ext_irec_t struct introduces a second (top) level > - * to the in-core extent allocation scheme. These structs are allocated > - * in a contiguous block, creating an indirection array where each entry > - * (irec) contains a pointer to a buffer of in-core extent records which > - * it manages. Each extent buffer is 4k in size, since 4k is the system > - * page size on Linux i386 and systems with larger page sizes don't seem > - * to gain much, if anything, by using their native page size as the > - * extent buffer size. Also, using 4k extent buffers everywhere provides > - * a consistent interface for CXFS across different platforms. > - * > - * There is currently no limit on the number of irec's (extent lists) > - * allowed, so heavily fragmented files may require an indirection array > - * which spans multiple system pages of memory. The number of extents > - * which would require this amount of contiguous memory is very large > - * and should not cause problems in the foreseeable future. However, > - * if the memory needed for the contiguous array ever becomes a problem, > - * it is possible that a third level of indirection may be required. > - */ > -typedef struct xfs_ext_irec { > - xfs_bmbt_rec_host_t *er_extbuf; /* block of extent records */ > - xfs_extnum_t er_extoff; /* extent offset in file */ > - xfs_extnum_t er_extcount; /* number of extents in page/block */ > -} xfs_ext_irec_t; > - > /* > * File incore extent information, present for each of data & attr forks. > */ > -#define XFS_IEXT_BUFSZ 4096 > -#define XFS_LINEAR_EXTS (XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t)) > typedef struct xfs_ifork { > int if_bytes; /* bytes in if_u1 */ > int if_real_bytes; /* bytes allocated in if_u1 */ > struct xfs_btree_block *if_broot; /* file's incore btree root */ > short if_broot_bytes; /* bytes allocated for root */ > unsigned char if_flags; /* per-fork flags */ > + int if_height; /* height of the extent tree */ > union { > - xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ > - xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ > + void *if_root; /* extent tree root */ > char *if_data; /* inline file data */ > } if_u1; > } xfs_ifork_t; > @@ -70,7 +43,6 @@ typedef struct xfs_ifork { > #define XFS_IFINLINE 0x01 /* Inline data is read in */ > #define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ > #define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ > -#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ > > /* > * Fork handling. > @@ -140,35 +112,12 @@ int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *, > int); > void xfs_init_local_fork(struct xfs_inode *, int, const void *, int); > > -struct xfs_bmbt_rec_host * > - xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t); > -xfs_extnum_t xfs_iext_count(struct xfs_ifork *); > +xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp); > void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur, > xfs_extnum_t, struct xfs_bmbt_irec *, int); > -void xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int); > -void xfs_iext_add_indirect_multi(struct xfs_ifork *, int, > - xfs_extnum_t, int); > void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *, > int, int); > -void xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int); > -void xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int); > -void xfs_iext_realloc_direct(struct xfs_ifork *, int); > void xfs_iext_destroy(struct xfs_ifork *); > -struct xfs_bmbt_rec_host * > - xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *); > -struct xfs_ext_irec * > - xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *); > -struct xfs_ext_irec * > - xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *, > - int); > -void xfs_iext_irec_init(struct xfs_ifork *); > -struct xfs_ext_irec * > - xfs_iext_irec_new(struct xfs_ifork *, int); > -void xfs_iext_irec_remove(struct xfs_ifork *, int); > -void xfs_iext_irec_compact(struct xfs_ifork *); > -void xfs_iext_irec_compact_pages(struct xfs_ifork *); > -void xfs_iext_irec_compact_full(struct xfs_ifork *); > -void xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int); > > bool xfs_iext_lookup_extent(struct xfs_inode *ip, > struct xfs_ifork *ifp, xfs_fileoff_t bno, > @@ -185,29 +134,10 @@ void xfs_iext_update_extent(struct xfs_inode *ip, int state, > struct xfs_iext_cursor *cur, > struct xfs_bmbt_irec *gotp); > > -static inline void xfs_iext_first(struct xfs_ifork *ifp, > - struct xfs_iext_cursor *cur) > -{ > - cur->idx = 0; > -} > - > -static inline void xfs_iext_last(struct xfs_ifork *ifp, > - struct xfs_iext_cursor *cur) > -{ > - cur->idx = xfs_iext_count(ifp) - 1; > -} > - > -static inline void xfs_iext_next(struct xfs_ifork *ifp, > - struct xfs_iext_cursor *cur) > -{ > - cur->idx++; > -} > - > -static inline void xfs_iext_prev(struct xfs_ifork *ifp, > - struct xfs_iext_cursor *cur) > -{ > - cur->idx--; > -} > +void xfs_iext_first(struct xfs_ifork *, struct xfs_iext_cursor *); > +void xfs_iext_last(struct xfs_ifork *, struct xfs_iext_cursor *); > +void xfs_iext_next(struct xfs_ifork *, struct xfs_iext_cursor *); > +void xfs_iext_prev(struct xfs_ifork *, struct xfs_iext_cursor *); > > static inline bool xfs_iext_next_extent(struct xfs_ifork *ifp, > struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *gotp) > diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h > index 5da6382bdaf1..983878019097 100644 > --- a/fs/xfs/libxfs/xfs_types.h > +++ b/fs/xfs/libxfs/xfs_types.h > @@ -143,7 +143,8 @@ typedef uint32_t xfs_dqid_t; > #define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1) > > struct xfs_iext_cursor { > - xfs_extnum_t idx; > + struct xfs_iext_leaf *leaf; > + int pos; > }; > > #endif /* __XFS_TYPES_H__ */ > diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c > index 778b709dbd0c..72d753d898d3 100644 > --- a/fs/xfs/scrub/bmap.c > +++ b/fs/xfs/scrub/bmap.c > @@ -168,7 +168,6 @@ xfs_scrub_bmapbt_rec( > struct xfs_scrub_btree *bs, > union xfs_btree_rec *rec) > { > - struct xfs_bmbt_rec_host ihost; > struct xfs_bmbt_irec irec; > struct xfs_scrub_bmap_info *info = bs->private; > struct xfs_inode *ip = bs->cur->bc_private.b.ip; > @@ -193,9 +192,7 @@ xfs_scrub_bmapbt_rec( > } > > /* Set up the in-core record and scrub it. */ > - ihost.l0 = be64_to_cpu(rec->bmbt.l0); > - ihost.l1 = be64_to_cpu(rec->bmbt.l1); > - xfs_bmbt_get_all(&ihost, &irec); > + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); > return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec); > } > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > index a929ca72fa8e..5b5128ed0f18 100644 > --- a/fs/xfs/xfs_inode.c > +++ b/fs/xfs/xfs_inode.c > @@ -933,7 +933,7 @@ xfs_ialloc( > ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; > ip->i_df.if_flags = XFS_IFEXTENTS; > ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0; > - ip->i_df.if_u1.if_extents = NULL; > + ip->i_df.if_u1.if_root = NULL; > break; > default: > ASSERT(0); > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > index eb6f4f7c9520..6ee5c3bf19ad 100644 > --- a/fs/xfs/xfs_inode_item.c > +++ b/fs/xfs/xfs_inode_item.c > @@ -162,7 +162,6 @@ xfs_inode_item_format_data_fork( > ip->i_df.if_bytes > 0) { > struct xfs_bmbt_rec *p; > > - ASSERT(ip->i_df.if_u1.if_extents != NULL); > ASSERT(xfs_iext_count(&ip->i_df) > 0); > > p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); > @@ -252,7 +251,6 @@ xfs_inode_item_format_attr_fork( > > ASSERT(xfs_iext_count(ip->i_afp) == > ip->i_d.di_anextents); > - ASSERT(ip->i_afp->if_u1.if_extents != NULL); > > p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); > data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h > index 667bfce802cd..515ba042d75c 100644 > --- a/fs/xfs/xfs_trace.h > +++ b/fs/xfs/xfs_trace.h > @@ -218,45 +218,6 @@ TRACE_EVENT(xfs_attr_list_node_descend, > __entry->bt_before) > ); > > -TRACE_EVENT(xfs_iext_insert, > - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, > - struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), > - TP_ARGS(ip, idx, r, state, caller_ip), > - TP_STRUCT__entry( > - __field(dev_t, dev) > - __field(xfs_ino_t, ino) > - __field(xfs_extnum_t, idx) > - __field(xfs_fileoff_t, startoff) > - __field(xfs_fsblock_t, startblock) > - __field(xfs_filblks_t, blockcount) > - __field(xfs_exntst_t, state) > - __field(int, bmap_state) > - __field(unsigned long, caller_ip) > - ), > - TP_fast_assign( > - __entry->dev = VFS_I(ip)->i_sb->s_dev; > - __entry->ino = ip->i_ino; > - __entry->idx = idx; > - __entry->startoff = r->br_startoff; > - __entry->startblock = r->br_startblock; > - __entry->blockcount = r->br_blockcount; > - __entry->state = r->br_state; > - __entry->bmap_state = state; > - __entry->caller_ip = caller_ip; > - ), > - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " > - "offset %lld block %lld count %lld flag %d caller %ps", > - MAJOR(__entry->dev), MINOR(__entry->dev), > - __entry->ino, > - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), > - (long)__entry->idx, > - __entry->startoff, > - (int64_t)__entry->startblock, > - __entry->blockcount, > - __entry->state, > - (char *)__entry->caller_ip) > -); > - > DECLARE_EVENT_CLASS(xfs_bmap_class, > TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, > unsigned long caller_ip), > @@ -264,7 +225,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, > TP_STRUCT__entry( > __field(dev_t, dev) > __field(xfs_ino_t, ino) > - __field(xfs_extnum_t, idx) > + __field(void *, leaf); > + __field(int, pos); > __field(xfs_fileoff_t, startoff) > __field(xfs_fsblock_t, startblock) > __field(xfs_filblks_t, blockcount) > @@ -280,7 +242,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, > xfs_iext_get_extent(ifp, cur, &r); > __entry->dev = VFS_I(ip)->i_sb->s_dev; > __entry->ino = ip->i_ino; > - __entry->idx = cur->idx; > + __entry->leaf = cur->leaf; > + __entry->pos = cur->pos; > __entry->startoff = r.br_startoff; > __entry->startblock = r.br_startblock; > __entry->blockcount = r.br_blockcount; > @@ -288,12 +251,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, > __entry->bmap_state = state; > __entry->caller_ip = caller_ip; > ), > - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " > + TP_printk("dev %d:%d ino 0x%llx state %s cur 0x%p/%d " > "offset %lld block %lld count %lld flag %d caller %ps", > MAJOR(__entry->dev), MINOR(__entry->dev), > __entry->ino, > __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), > - (long)__entry->idx, > + __entry->leaf, > + __entry->pos, > __entry->startoff, > (int64_t)__entry->startblock, > __entry->blockcount, > @@ -306,6 +270,7 @@ DEFINE_EVENT(xfs_bmap_class, name, \ > TP_PROTO(struct xfs_inode *ip, struct xfs_iext_cursor *cur, int state, \ > unsigned long caller_ip), \ > TP_ARGS(ip, cur, state, caller_ip)) > +DEFINE_BMAP_EVENT(xfs_iext_insert); > DEFINE_BMAP_EVENT(xfs_iext_remove); > DEFINE_BMAP_EVENT(xfs_bmap_pre_update); > DEFINE_BMAP_EVENT(xfs_bmap_post_update); > -- > 2.14.2 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html