Re: [PATCH v4 4/8] uprobes: travers uprobe's consumer list locklessly under SRCU protection

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Aug 29, 2024 at 11:37:37AM -0700, Andrii Nakryiko wrote:
> uprobe->register_rwsem is one of a few big bottlenecks to scalability of
> uprobes, so we need to get rid of it to improve uprobe performance and
> multi-CPU scalability.
> 
> First, we turn uprobe's consumer list to a typical doubly-linked list
> and utilize existing RCU-aware helpers for traversing such lists, as
> well as adding and removing elements from it.
> 
> For entry uprobes we already have SRCU protection active since before
> uprobe lookup. For uretprobe we keep refcount, guaranteeing that uprobe
> won't go away from under us, but we add SRCU protection around consumer
> list traversal.
> 
> Lastly, to keep handler_chain()'s UPROBE_HANDLER_REMOVE handling simple,
> we remember whether any removal was requested during handler calls, but
> then we double-check the decision under a proper register_rwsem using
> consumers' filter callbacks. Handler removal is very rare, so this extra
> lock won't hurt performance, overall, but we also avoid the need for any
> extra protection (e.g., seqcount locks).
> 
> Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
> ---
>  include/linux/uprobes.h |   2 +-
>  kernel/events/uprobes.c | 104 +++++++++++++++++++++++-----------------
>  2 files changed, 62 insertions(+), 44 deletions(-)
> 
> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index 9cf0dce62e4c..29c935b0d504 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -35,7 +35,7 @@ struct uprobe_consumer {
>  				struct pt_regs *regs);
>  	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
>  
> -	struct uprobe_consumer *next;
> +	struct list_head cons_node;
>  };
>  
>  #ifdef CONFIG_UPROBES
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 8bdcdc6901b2..97e58d160647 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -59,7 +59,7 @@ struct uprobe {
>  	struct rw_semaphore	register_rwsem;
>  	struct rw_semaphore	consumer_rwsem;
>  	struct list_head	pending_list;
> -	struct uprobe_consumer	*consumers;
> +	struct list_head	consumers;
>  	struct inode		*inode;		/* Also hold a ref to inode */
>  	struct rcu_head		rcu;
>  	loff_t			offset;
> @@ -783,6 +783,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
>  	uprobe->inode = inode;
>  	uprobe->offset = offset;
>  	uprobe->ref_ctr_offset = ref_ctr_offset;
> +	INIT_LIST_HEAD(&uprobe->consumers);
>  	init_rwsem(&uprobe->register_rwsem);
>  	init_rwsem(&uprobe->consumer_rwsem);
>  	RB_CLEAR_NODE(&uprobe->rb_node);
> @@ -808,32 +809,19 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
>  static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  {
>  	down_write(&uprobe->consumer_rwsem);
> -	uc->next = uprobe->consumers;
> -	uprobe->consumers = uc;
> +	list_add_rcu(&uc->cons_node, &uprobe->consumers);
>  	up_write(&uprobe->consumer_rwsem);
>  }
>  
>  /*
>   * For uprobe @uprobe, delete the consumer @uc.
> - * Return true if the @uc is deleted successfully
> - * or return false.
> + * Should never be called with consumer that's not part of @uprobe->consumers.
>   */
> -static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
> +static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  {
> -	struct uprobe_consumer **con;
> -	bool ret = false;
> -
>  	down_write(&uprobe->consumer_rwsem);
> -	for (con = &uprobe->consumers; *con; con = &(*con)->next) {
> -		if (*con == uc) {
> -			*con = uc->next;
> -			ret = true;
> -			break;
> -		}
> -	}
> +	list_del_rcu(&uc->cons_node);
>  	up_write(&uprobe->consumer_rwsem);
> -
> -	return ret;
>  }
>  
>  static int __copy_insn(struct address_space *mapping, struct file *filp,
> @@ -929,7 +917,8 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
>  	bool ret = false;
>  
>  	down_read(&uprobe->consumer_rwsem);
> -	for (uc = uprobe->consumers; uc; uc = uc->next) {
> +	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> +				 srcu_read_lock_held(&uprobes_srcu)) {
>  		ret = consumer_filter(uc, mm);
>  		if (ret)
>  			break;
> @@ -1125,18 +1114,29 @@ void uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  	int err;
>  
>  	down_write(&uprobe->register_rwsem);
> -	if (WARN_ON(!consumer_del(uprobe, uc))) {
> -		err = -ENOENT;
> -	} else {
> -		err = register_for_each_vma(uprobe, NULL);
> -		/* TODO : cant unregister? schedule a worker thread */
> -		if (unlikely(err))
> -			uprobe_warn(current, "unregister, leaking uprobe");
> -	}
> +	consumer_del(uprobe, uc);
> +	err = register_for_each_vma(uprobe, NULL);
>  	up_write(&uprobe->register_rwsem);
>  
> -	if (!err)
> -		put_uprobe(uprobe);
> +	/* TODO : cant unregister? schedule a worker thread */
> +	if (unlikely(err)) {
> +		uprobe_warn(current, "unregister, leaking uprobe");
> +		goto out_sync;
> +	}
> +
> +	put_uprobe(uprobe);
> +
> +out_sync:
> +	/*
> +	 * Now that handler_chain() and handle_uretprobe_chain() iterate over
> +	 * uprobe->consumers list under RCU protection without holding
> +	 * uprobe->register_rwsem, we need to wait for RCU grace period to
> +	 * make sure that we can't call into just unregistered
> +	 * uprobe_consumer's callbacks anymore. If we don't do that, fast and
> +	 * unlucky enough caller can free consumer's memory and cause
> +	 * handler_chain() or handle_uretprobe_chain() to do an use-after-free.
> +	 */
> +	synchronize_srcu(&uprobes_srcu);
>  }
>  EXPORT_SYMBOL_GPL(uprobe_unregister);
>  
> @@ -1214,13 +1214,20 @@ EXPORT_SYMBOL_GPL(uprobe_register);
>  int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
>  {
>  	struct uprobe_consumer *con;
> -	int ret = -ENOENT;
> +	int ret = -ENOENT, srcu_idx;
>  
>  	down_write(&uprobe->register_rwsem);
> -	for (con = uprobe->consumers; con && con != uc ; con = con->next)
> -		;
> -	if (con)
> -		ret = register_for_each_vma(uprobe, add ? uc : NULL);
> +
> +	srcu_idx = srcu_read_lock(&uprobes_srcu);
> +	list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
> +				 srcu_read_lock_held(&uprobes_srcu)) {
> +		if (con == uc) {
> +			ret = register_for_each_vma(uprobe, add ? uc : NULL);
> +			break;
> +		}
> +	}
> +	srcu_read_unlock(&uprobes_srcu, srcu_idx);
> +
>  	up_write(&uprobe->register_rwsem);
>  
>  	return ret;
> @@ -2085,10 +2092,12 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  	struct uprobe_consumer *uc;
>  	int remove = UPROBE_HANDLER_REMOVE;
>  	bool need_prep = false; /* prepare return uprobe, when needed */
> +	bool has_consumers = false;
>  
> -	down_read(&uprobe->register_rwsem);
>  	current->utask->auprobe = &uprobe->arch;
> -	for (uc = uprobe->consumers; uc; uc = uc->next) {
> +
> +	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> +				 srcu_read_lock_held(&uprobes_srcu)) {
>  		int rc = 0;
>  
>  		if (uc->handler) {
> @@ -2101,17 +2110,24 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  			need_prep = true;
>  
>  		remove &= rc;
> +		has_consumers = true;
>  	}
>  	current->utask->auprobe = NULL;
>  
>  	if (need_prep && !remove)
>  		prepare_uretprobe(uprobe, regs); /* put bp at return */
>  
> -	if (remove && uprobe->consumers) {
> -		WARN_ON(!uprobe_is_active(uprobe));
> -		unapply_uprobe(uprobe, current->mm);
> +	if (remove && has_consumers) {
> +		down_read(&uprobe->register_rwsem);
> +
> +		/* re-check that removal is still required, this time under lock */
> +		if (!filter_chain(uprobe, current->mm)) {

sorry for late question, but I do not follow this change.. 

at this point we got 1 as handler's return value from all the uprobe's consumers,
why do we need to call filter_chain in here.. IIUC this will likely skip over
the removal?

with single uprobe_multi consumer:

  handler_chain
    uprobe_multi_link_handler
      uprobe_prog_run
        bpf_prog returns 1

    remove = 1

    if (remove && has_consumers) {

      filter_chain - uprobe_multi_link_filter returns true.. so the uprobe stays?

maybe I just need to write test for it ;-)

thanks,
jirka


> +			WARN_ON(!uprobe_is_active(uprobe));
> +			unapply_uprobe(uprobe, current->mm);
> +		}
> +
> +		up_read(&uprobe->register_rwsem);
>  	}
> -	up_read(&uprobe->register_rwsem);
>  }
>  
>  static void
> @@ -2119,13 +2135,15 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
>  {
>  	struct uprobe *uprobe = ri->uprobe;
>  	struct uprobe_consumer *uc;
> +	int srcu_idx;
>  
> -	down_read(&uprobe->register_rwsem);
> -	for (uc = uprobe->consumers; uc; uc = uc->next) {
> +	srcu_idx = srcu_read_lock(&uprobes_srcu);
> +	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> +				 srcu_read_lock_held(&uprobes_srcu)) {
>  		if (uc->ret_handler)
>  			uc->ret_handler(uc, ri->func, regs);
>  	}
> -	up_read(&uprobe->register_rwsem);
> +	srcu_read_unlock(&uprobes_srcu, srcu_idx);
>  }
>  
>  static struct return_instance *find_next_ret_chain(struct return_instance *ri)
> -- 
> 2.43.5
> 




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux