Setup the maple_node_cache with percpu sheaves of size 32 to hopefully improve its performance. Change the single node rcu freeing in ma_free_rcu() to use kfree_rcu() instead of the custom callback, which allows the rcu_free sheaf batching to be used. Note there are other users of mt_free_rcu() where larger parts of maple tree are submitted to call_rcu() as a whole, and that cannot use the rcu_free sheaf, but it's still possible for maple nodes freed this way to be reused via the barn, even if only some cpus are allowed to process rcu callbacks. Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx> --- lib/maple_tree.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 3619301dda2ebeaaba8a73837389b6ee3c7e1a3f..c69365e17fcbfe963dcedd0de07335fc6bbdfb27 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -194,7 +194,7 @@ static void mt_free_rcu(struct rcu_head *head) static void ma_free_rcu(struct maple_node *node) { WARN_ON(node->parent != ma_parent_ptr(node)); - call_rcu(&node->rcu, mt_free_rcu); + kfree_rcu(node, rcu); } static void mas_set_height(struct ma_state *mas) @@ -6299,9 +6299,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) void __init maple_tree_init(void) { + struct kmem_cache_args args = { + .align = sizeof(struct maple_node), + .sheaf_capacity = 32, + }; + maple_node_cache = kmem_cache_create("maple_node", - sizeof(struct maple_node), sizeof(struct maple_node), - SLAB_PANIC, NULL); + sizeof(struct maple_node), &args, + SLAB_PANIC); } /** -- 2.47.0