On Sat 17-02-24 15:24:16, Chuck Lever wrote: > From: Chuck Lever <chuck.lever@xxxxxxxxxx> > > Test robot reports: > > kernel test robot noticed a -19.0% regression of aim9.disk_src.ops_per_sec on: > > > > commit: a2e459555c5f9da3e619b7e47a63f98574dc75f1 ("shmem: stable directory offsets") > > https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git master > > Feng Tang further clarifies that: > > ... the new simple_offset_add() > > called by shmem_mknod() brings extra cost related with slab, > > specifically the 'radix_tree_node', which cause the regression. > > Willy's analysis is that, over time, the test workload causes > xa_alloc_cyclic() to fragment the underlying SLAB cache. > > This patch replaces the offset_ctx's xarray with a Maple Tree in the > hope that Maple Tree's dense node mode will handle this scenario > more scalably. > > In addition, we can widen the simple directory offset maximum to > signed long (as loff_t is also signed). > > Suggested-by: Matthew Wilcox <willy@xxxxxxxxxxxxx> > Reported-by: kernel test robot <oliver.sang@xxxxxxxxx> > Closes: https://lore.kernel.org/oe-lkp/202309081306.3ecb3734-oliver.sang@xxxxxxxxx > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> Looks good. Feel free to add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > --- > fs/libfs.c | 47 +++++++++++++++++++++++------------------------ > include/linux/fs.h | 5 +++-- > 2 files changed, 26 insertions(+), 26 deletions(-) > > diff --git a/fs/libfs.c b/fs/libfs.c > index f7f92a49a418..d3d31197c8e4 100644 > --- a/fs/libfs.c > +++ b/fs/libfs.c > @@ -245,17 +245,17 @@ enum { > DIR_OFFSET_MIN = 2, > }; > > -static void offset_set(struct dentry *dentry, u32 offset) > +static void offset_set(struct dentry *dentry, long offset) > { > - dentry->d_fsdata = (void *)((uintptr_t)(offset)); > + dentry->d_fsdata = (void *)offset; > } > > -static u32 dentry2offset(struct dentry *dentry) > +static long dentry2offset(struct dentry *dentry) > { > - return (u32)((uintptr_t)(dentry->d_fsdata)); > + return (long)dentry->d_fsdata; > } > > -static struct lock_class_key simple_offset_xa_lock; > +static struct lock_class_key simple_offset_lock_class; > > /** > * simple_offset_init - initialize an offset_ctx > @@ -264,8 +264,8 @@ static struct lock_class_key simple_offset_xa_lock; > */ > void simple_offset_init(struct offset_ctx *octx) > { > - xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1); > - lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock); > + mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE); > + lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class); > octx->next_offset = DIR_OFFSET_MIN; > } > > @@ -274,20 +274,19 @@ void simple_offset_init(struct offset_ctx *octx) > * @octx: directory offset ctx to be updated > * @dentry: new dentry being added > * > - * Returns zero on success. @so_ctx and the dentry offset are updated. > + * Returns zero on success. @octx and the dentry's offset are updated. > * Otherwise, a negative errno value is returned. > */ > int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) > { > - static const struct xa_limit limit = XA_LIMIT(DIR_OFFSET_MIN, U32_MAX); > - u32 offset; > + unsigned long offset; > int ret; > > if (dentry2offset(dentry) != 0) > return -EBUSY; > > - ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit, > - &octx->next_offset, GFP_KERNEL); > + ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN, > + LONG_MAX, &octx->next_offset, GFP_KERNEL); > if (ret < 0) > return ret; > > @@ -303,13 +302,13 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) > */ > void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) > { > - u32 offset; > + long offset; > > offset = dentry2offset(dentry); > if (offset == 0) > return; > > - xa_erase(&octx->xa, offset); > + mtree_erase(&octx->mt, offset); > offset_set(dentry, 0); > } > > @@ -332,7 +331,7 @@ int simple_offset_empty(struct dentry *dentry) > > index = DIR_OFFSET_MIN; > octx = inode->i_op->get_offset_ctx(inode); > - xa_for_each(&octx->xa, index, child) { > + mt_for_each(&octx->mt, child, index, LONG_MAX) { > spin_lock(&child->d_lock); > if (simple_positive(child)) { > spin_unlock(&child->d_lock); > @@ -362,8 +361,8 @@ int simple_offset_rename_exchange(struct inode *old_dir, > { > struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); > struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); > - u32 old_index = dentry2offset(old_dentry); > - u32 new_index = dentry2offset(new_dentry); > + long old_index = dentry2offset(old_dentry); > + long new_index = dentry2offset(new_dentry); > int ret; > > simple_offset_remove(old_ctx, old_dentry); > @@ -389,9 +388,9 @@ int simple_offset_rename_exchange(struct inode *old_dir, > > out_restore: > offset_set(old_dentry, old_index); > - xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL); > + mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL); > offset_set(new_dentry, new_index); > - xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL); > + mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); > return ret; > } > > @@ -404,7 +403,7 @@ int simple_offset_rename_exchange(struct inode *old_dir, > */ > void simple_offset_destroy(struct offset_ctx *octx) > { > - xa_destroy(&octx->xa); > + mtree_destroy(&octx->mt); > } > > /** > @@ -434,16 +433,16 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) > > /* In this case, ->private_data is protected by f_pos_lock */ > file->private_data = NULL; > - return vfs_setpos(file, offset, U32_MAX); > + return vfs_setpos(file, offset, LONG_MAX); > } > > static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset) > { > + MA_STATE(mas, &octx->mt, offset, offset); > struct dentry *child, *found = NULL; > - XA_STATE(xas, &octx->xa, offset); > > rcu_read_lock(); > - child = xas_next_entry(&xas, U32_MAX); > + child = mas_find(&mas, LONG_MAX); > if (!child) > goto out; > spin_lock(&child->d_lock); > @@ -457,8 +456,8 @@ static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset) > > static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) > { > - u32 offset = dentry2offset(dentry); > struct inode *inode = d_inode(dentry); > + long offset = dentry2offset(dentry); > > return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset, > inode->i_ino, fs_umode_to_dtype(inode->i_mode)); > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 03d141809a2c..55144c12ee0f 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -43,6 +43,7 @@ > #include <linux/cred.h> > #include <linux/mnt_idmapping.h> > #include <linux/slab.h> > +#include <linux/maple_tree.h> > > #include <asm/byteorder.h> > #include <uapi/linux/fs.h> > @@ -3260,8 +3261,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, > const void __user *from, size_t count); > > struct offset_ctx { > - struct xarray xa; > - u32 next_offset; > + struct maple_tree mt; > + unsigned long next_offset; > }; > > void simple_offset_init(struct offset_ctx *octx); > > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR