Re: [PATCH 04/11] maple_tree: Introduce interfaces __mt_dup() and mt_dup()

"Liam R. Howlett" <Liam.Howlett@xxxxxxxxxx> · Wed, 26 Jul 2023 12:03:54 -0400

* Peng Zhang <zhangpeng.00@xxxxxxxxxxxxx> [230726 04:10]:
> Introduce interfaces __mt_dup() and mt_dup(), which are used to
> duplicate a maple tree. Compared with traversing the source tree and
> reinserting entry by entry in the new tree, it has better performance.
> The difference between __mt_dup() and mt_dup() is that mt_dup() holds
> an internal lock.
> 
> Signed-off-by: Peng Zhang <zhangpeng.00@xxxxxxxxxxxxx>
> ---
>  include/linux/maple_tree.h |   3 +
>  lib/maple_tree.c           | 211 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 214 insertions(+)
> 
> diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
> index c962af188681..229fe78e4c89 100644
> --- a/include/linux/maple_tree.h
> +++ b/include/linux/maple_tree.h
> @@ -327,6 +327,9 @@ int mtree_store(struct maple_tree *mt, unsigned long index,
>  		void *entry, gfp_t gfp);
>  void *mtree_erase(struct maple_tree *mt, unsigned long index);
>  
> +int mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
> +int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
> +
>  void mtree_destroy(struct maple_tree *mt);
>  void __mt_destroy(struct maple_tree *mt);
>  
> diff --git a/lib/maple_tree.c b/lib/maple_tree.c
> index da3a2fb405c0..efac6761ae37 100644
> --- a/lib/maple_tree.c
> +++ b/lib/maple_tree.c
> @@ -6595,6 +6595,217 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index)
>  }
>  EXPORT_SYMBOL(mtree_erase);
>  
> +/*
> + * mt_dup_free() - Free the nodes of a incomplete maple tree.
> + * @mt: The incomplete maple tree
> + * @node: Free nodes from @node
> + *
> + * This function frees all nodes starting from @node in the reverse order of
> + * mt_dup_build(). At this point we don't need to hold the source tree lock.
> + */
> +static void mt_dup_free(struct maple_tree *mt, struct maple_node *node)
> +{
> +	void **slots;
> +	unsigned char offset;
> +	struct maple_enode *enode;
> +	enum maple_type type;
> +	unsigned char count = 0, i;
> +

Can we make these labels inline functions and try to make this a loop?

> +try_ascend:
> +	if (ma_is_root(node)) {
> +		mt_free_one(node);
> +		return;
> +	}
> +
> +	offset = ma_parent_slot(node);
> +	type = ma_parent_type(mt, node);
> +	node = ma_parent(node);
> +	if (!offset)
> +		goto free;
> +
> +	offset--;
> +
> +descend:
> +	slots = (void **)ma_slots(node, type);
> +	enode = slots[offset];
> +	if (mte_is_leaf(enode))
> +		goto free;
> +
> +	type = mte_node_type(enode);
> +	node = mte_to_node(enode);
> +	offset = ma_nonleaf_data_end_nocheck(node, type);
> +	goto descend;
> +
> +free:
> +	slots = (void **)ma_slots(node, type);
> +	count = ma_nonleaf_data_end_nocheck(node, type) + 1;
> +	for (i = 0; i < count; i++)
> +		((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
> +
> +	/* Cast to __rcu to avoid sparse checker complaining. */
> +	mt_free_bulk(count, (void __rcu **)slots);
> +	goto try_ascend;
> +}
> +
> +/*
> + * mt_dup_build() - Build a new maple tree from a source tree
> + * @mt: The source maple tree to copy from
> + * @new: The new maple tree
> + * @gfp: The GFP_FLAGS to use for allocations
> + * @to_free: Free nodes starting from @to_free if the build fails
> + *
> + * This function builds a new tree in DFS preorder. If it fails due to memory
> + * allocation, @to_free will store the last failed node to free the incomplete
> + * tree. Use mt_dup_free() to free nodes.
> + *
> + * Return: 0 on success, -ENOMEM if memory could not be allocated.
> + */
> +static inline int mt_dup_build(struct maple_tree *mt, struct maple_tree *new,
> +			       gfp_t gfp, struct maple_node **to_free)

I am trying to change the functions to be two tabs of indent for
arguments from now on.  It allows for more to fit on a single line and
still maintains a clear separation between code and argument lists.

> +{
> +	struct maple_enode *enode;
> +	struct maple_node *new_node, *new_parent = NULL, *node;
> +	enum maple_type type;
> +	void __rcu **slots;
> +	void **new_slots;
> +	unsigned char count, request, i, offset;
> +	unsigned long *set_parent;
> +	unsigned long new_root;
> +
> +	mt_init_flags(new, mt->ma_flags);
> +	enode = mt_root_locked(mt);
> +	if (unlikely(!xa_is_node(enode))) {
> +		rcu_assign_pointer(new->ma_root, enode);
> +		return 0;
> +	}
> +
> +	new_node = mt_alloc_one(gfp);
> +	if (!new_node)
> +		return -ENOMEM;
> +
> +	new_root = (unsigned long)new_node;
> +	new_root |= (unsigned long)enode & MAPLE_NODE_MASK;
> +
> +copy_node:

Can you make copy_node, descend, ascend inline functions instead of the
goto jumping please?  It's better to have loops over jumping around a
lot.  Gotos are good for undoing things and retry, but constructing
loops with them makes it difficult to follow.

> +	node = mte_to_node(enode);
> +	type = mte_node_type(enode);
> +	memcpy(new_node, node, sizeof(struct maple_node));
> +
> +	set_parent = (unsigned long *)&(new_node->parent);
> +	*set_parent &= MAPLE_NODE_MASK;
> +	*set_parent |= (unsigned long)new_parent;

Maybe make a small inline to set the parent instead of this?

There are some defined helpers for setting the types like
ma_parent_ptr() and ma_enode_ptr() to make casting more type-safe.

> +	if (ma_is_leaf(type))
> +		goto ascend;
> +
> +	new_slots = (void **)ma_slots(new_node, type);
> +	slots = ma_slots(node, type);
> +	request = ma_nonleaf_data_end(mt, node, type) + 1;
> +	count = mt_alloc_bulk(gfp, request, new_slots);
> +	if (!count) {
> +		*to_free = new_node;
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < count; i++)
> +		((unsigned long *)new_slots)[i] |=
> +				((unsigned long)mt_slot_locked(mt, slots, i) &
> +				 MAPLE_NODE_MASK);
> +	offset = 0;
> +
> +descend:
> +	new_parent = new_node;
> +	enode = mt_slot_locked(mt, slots, offset);
> +	new_node = mte_to_node(new_slots[offset]);
> +	goto copy_node;
> +
> +ascend:
> +	if (ma_is_root(node)) {
> +		new_node = mte_to_node((void *)new_root);
> +		new_node->parent = ma_parent_ptr((unsigned long)new |
> +						 MA_ROOT_PARENT);
> +		rcu_assign_pointer(new->ma_root, (void *)new_root);
> +		return 0;
> +	}
> +
> +	offset = ma_parent_slot(node);
> +	type = ma_parent_type(mt, node);
> +	node = ma_parent(node);
> +	new_node = ma_parent(new_node);
> +	if (offset < ma_nonleaf_data_end(mt, node, type)) {
> +		offset++;
> +		new_slots = (void **)ma_slots(new_node, type);
> +		slots = ma_slots(node, type);
> +		goto descend;
> +	}
> +
> +	goto ascend;
> +}
> +
> +/**
> + * __mt_dup(): Duplicate a maple tree
> + * @mt: The source maple tree
> + * @new: The new maple tree
> + * @gfp: The GFP_FLAGS to use for allocations
> + *
> + * This function duplicates a maple tree using a faster method than traversing
> + * the source tree and inserting entries into the new tree one by one. The user
> + * needs to lock the source tree manually. Before calling this function, @new
> + * must be an empty tree or an uninitialized tree. If @mt uses an external lock,
> + * we may also need to manually set @new's external lock using
> + * mt_set_external_lock().
> + *
> + * Return: 0 on success, -ENOMEM if memory could not be allocated.
> + */
> +int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)

We use mas_ for things that won't handle the locking and pass in a maple
state.  Considering the leaves need to be altered once this is returned,
I would expect passing in a maple state should be feasible?

> +{
> +	int ret;
> +	struct maple_node *to_free = NULL;
> +
> +	ret = mt_dup_build(mt, new, gfp, &to_free);
> +
> +	if (unlikely(ret == -ENOMEM)) {

On other errors, will the half constructed tree be returned?  Is this
safe?

> +		if (to_free)
> +			mt_dup_free(new, to_free);
> +	}
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(__mt_dup);
> +
> +/**
> + * mt_dup(): Duplicate a maple tree
> + * @mt: The source maple tree
> + * @new: The new maple tree
> + * @gfp: The GFP_FLAGS to use for allocations
> + *
> + * This function duplicates a maple tree using a faster method than traversing
> + * the source tree and inserting entries into the new tree one by one. The
> + * function will lock the source tree with an internal lock, and the user does
> + * not need to manually handle the lock. Before calling this function, @new must
> + * be an empty tree or an uninitialized tree. If @mt uses an external lock, we
> + * may also need to manually set @new's external lock using
> + * mt_set_external_lock().
> + *
> + * Return: 0 on success, -ENOMEM if memory could not be allocated.
> + */
> +int mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)

mtree_ ususually used to indicate locking is handled.

> +{
> +	int ret;
> +	struct maple_node *to_free = NULL;
> +
> +	mtree_lock(mt);
> +	ret = mt_dup_build(mt, new, gfp, &to_free);
> +	mtree_unlock(mt);
> +
> +	if (unlikely(ret == -ENOMEM)) {
> +		if (to_free)
> +			mt_dup_free(new, to_free);

Again, is a half constructed tree safe to return?  Since each caller
checks to_free is NULL, could that be in mt_dup_free() instead?

> +	}
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(mt_dup);
> +
>  /**
>   * __mt_destroy() - Walk and free all nodes of a locked maple tree.
>   * @mt: The maple tree
> -- 
> 2.20.1
> 
>