Re: [PATCHv3 1/7] uprobe: Add support for session consumer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon,  9 Sep 2024 09:45:48 +0200
Jiri Olsa <jolsa@xxxxxxxxxx> wrote:

> Adding support for uprobe consumer to be defined as session and have
> new behaviour for consumer's 'handler' and 'ret_handler' callbacks.
> 
> The session means that 'handler' and 'ret_handler' callbacks are
> connected in a way that allows to:
> 
>   - control execution of 'ret_handler' from 'handler' callback
>   - share data between 'handler' and 'ret_handler' callbacks
> 
> The session is enabled by setting new 'session' bool field to true
> in uprobe_consumer object.
> 
> We use return_consumer object to keep track of consumers with
> 'ret_handler'. This object also carries the shared data between
> 'handler' and and 'ret_handler' callbacks.
> 
> The control of 'ret_handler' callback execution is done via return
> value of the 'handler' callback. This patch adds new 'ret_handler'
> return value (2) which means to ignore ret_handler callback.
> 
> Actions on 'handler' callback return values are now:
> 
>   0 - execute ret_handler (if it's defined)
>   1 - remove uprobe
>   2 - do nothing (ignore ret_handler)
> 
> The session concept fits to our common use case where we do filtering
> on entry uprobe and based on the result we decide to run the return
> uprobe (or not).

OK, so this allows uprobes handler to record any input parameter and
access it from ret_handler as fprobe's private entry data, right?

The code looks good to me.

Acked-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>

trace_uprobe should also support $argN in retuprobe.

https://lore.kernel.org/all/170952365552.229804.224112990211602895.stgit@devnote2/

Thank you,

> 
> It's also convenient to share the data between session callbacks.
> 
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
>  include/linux/uprobes.h                       |  17 ++-
>  kernel/events/uprobes.c                       | 132 ++++++++++++++----
>  kernel/trace/bpf_trace.c                      |   6 +-
>  kernel/trace/trace_uprobe.c                   |  12 +-
>  .../selftests/bpf/bpf_testmod/bpf_testmod.c   |   2 +-
>  5 files changed, 133 insertions(+), 36 deletions(-)
> 
> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index 2b294bf1881f..557901e04624 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -24,7 +24,7 @@ struct notifier_block;
>  struct page;
>  
>  #define UPROBE_HANDLER_REMOVE		1
> -#define UPROBE_HANDLER_MASK		1
> +#define UPROBE_HANDLER_IGNORE		2
>  
>  #define MAX_URETPROBE_DEPTH		64
>  
> @@ -37,13 +37,16 @@ struct uprobe_consumer {
>  	 * for the current process. If filter() is omitted or returns true,
>  	 * UPROBE_HANDLER_REMOVE is effectively ignored.
>  	 */
> -	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
> +	int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
>  	int (*ret_handler)(struct uprobe_consumer *self,
>  				unsigned long func,
> -				struct pt_regs *regs);
> +				struct pt_regs *regs, __u64 *data);
>  	bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
>  
>  	struct list_head cons_node;
> +	bool session;
> +
> +	__u64 id;	/* set when uprobe_consumer is registered */
>  };
>  
>  #ifdef CONFIG_UPROBES
> @@ -83,14 +86,22 @@ struct uprobe_task {
>  	unsigned int			depth;
>  };
>  
> +struct return_consumer {
> +	__u64	cookie;
> +	__u64	id;
> +};
> +
>  struct return_instance {
>  	struct uprobe		*uprobe;
>  	unsigned long		func;
>  	unsigned long		stack;		/* stack pointer */
>  	unsigned long		orig_ret_vaddr; /* original return address */
>  	bool			chained;	/* true, if instance is nested */
> +	int			consumers_cnt;
>  
>  	struct return_instance	*next;		/* keep as stack */
> +
> +	struct return_consumer	consumers[] __counted_by(consumers_cnt);
>  };
>  
>  enum rp_check {
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 4b7e590dc428..9e971f86afdf 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -67,6 +67,8 @@ struct uprobe {
>  	loff_t			ref_ctr_offset;
>  	unsigned long		flags;
>  
> +	unsigned int		consumers_cnt;
> +
>  	/*
>  	 * The generic code assumes that it has two members of unknown type
>  	 * owned by the arch-specific code:
> @@ -826,8 +828,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
>  
>  static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  {
> +	static atomic64_t id;
> +
>  	down_write(&uprobe->consumer_rwsem);
>  	list_add_rcu(&uc->cons_node, &uprobe->consumers);
> +	uc->id = (__u64) atomic64_inc_return(&id);
> +	uprobe->consumers_cnt++;
>  	up_write(&uprobe->consumer_rwsem);
>  }
>  
> @@ -839,6 +845,7 @@ static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  {
>  	down_write(&uprobe->consumer_rwsem);
>  	list_del_rcu(&uc->cons_node);
> +	uprobe->consumers_cnt--;
>  	up_write(&uprobe->consumer_rwsem);
>  }
>  
> @@ -1786,6 +1793,30 @@ static struct uprobe_task *get_utask(void)
>  	return current->utask;
>  }
>  
> +static size_t ri_size(int consumers_cnt)
> +{
> +	struct return_instance *ri;
> +
> +	return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
> +}
> +
> +static struct return_instance *alloc_return_instance(int consumers_cnt)
> +{
> +	struct return_instance *ri;
> +
> +	ri = kzalloc(ri_size(consumers_cnt), GFP_KERNEL);
> +	if (ri)
> +		ri->consumers_cnt = consumers_cnt;
> +	return ri;
> +}
> +
> +static struct return_instance *dup_return_instance(struct return_instance *old)
> +{
> +	size_t size = ri_size(old->consumers_cnt);
> +
> +	return kmemdup(old, size, GFP_KERNEL);
> +}
> +
>  static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>  {
>  	struct uprobe_task *n_utask;
> @@ -1798,11 +1829,10 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>  
>  	p = &n_utask->return_instances;
>  	for (o = o_utask->return_instances; o; o = o->next) {
> -		n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> +		n = dup_return_instance(o);
>  		if (!n)
>  			return -ENOMEM;
>  
> -		*n = *o;
>  		/*
>  		 * uprobe's refcnt has to be positive at this point, kept by
>  		 * utask->return_instances items; return_instances can't be
> @@ -1895,39 +1925,35 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
>  	utask->return_instances = ri;
>  }
>  
> -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
> +			      struct return_instance *ri)
>  {
> -	struct return_instance *ri;
>  	struct uprobe_task *utask;
>  	unsigned long orig_ret_vaddr, trampoline_vaddr;
>  	bool chained;
>  
>  	if (!get_xol_area())
> -		return;
> +		goto free;
>  
>  	utask = get_utask();
>  	if (!utask)
> -		return;
> +		goto free;
>  
>  	if (utask->depth >= MAX_URETPROBE_DEPTH) {
>  		printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
>  				" nestedness limit pid/tgid=%d/%d\n",
>  				current->pid, current->tgid);
> -		return;
> +		goto free;
>  	}
>  
>  	/* we need to bump refcount to store uprobe in utask */
>  	if (!try_get_uprobe(uprobe))
> -		return;
> -
> -	ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> -	if (!ri)
> -		goto fail;
> +		goto free;
>  
>  	trampoline_vaddr = uprobe_get_trampoline_vaddr();
>  	orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
>  	if (orig_ret_vaddr == -1)
> -		goto fail;
> +		goto put;
>  
>  	/* drop the entries invalidated by longjmp() */
>  	chained = (orig_ret_vaddr == trampoline_vaddr);
> @@ -1945,7 +1971,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  			 * attack from user-space.
>  			 */
>  			uprobe_warn(current, "handle tail call");
> -			goto fail;
> +			goto put;
>  		}
>  		orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
>  	}
> @@ -1960,9 +1986,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>  	utask->return_instances = ri;
>  
>  	return;
> -fail:
> -	kfree(ri);
> +put:
>  	put_uprobe(uprobe);
> +free:
> +	kfree(ri);
>  }
>  
>  /* Prepare to single-step probed instruction out of line. */
> @@ -2114,35 +2141,78 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
>  	return uprobe;
>  }
>  
> +static struct return_consumer *
> +return_consumer_next(struct return_instance *ri, struct return_consumer *ric)
> +{
> +	return ric ? ric + 1 : &ri->consumers[0];
> +}
> +
> +static struct return_consumer *
> +return_consumer_find(struct return_instance *ri, int *iter, int id)
> +{
> +	struct return_consumer *ric;
> +	int idx = *iter;
> +
> +	for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
> +		if (ric->id == id) {
> +			*iter = idx + 1;
> +			return ric;
> +		}
> +	}
> +	return NULL;
> +}
> +
>  static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  {
>  	struct uprobe_consumer *uc;
>  	int remove = UPROBE_HANDLER_REMOVE;
> -	bool need_prep = false; /* prepare return uprobe, when needed */
> +	struct return_consumer *ric = NULL;
> +	struct return_instance *ri = NULL;
>  	bool has_consumers = false;
>  
>  	current->utask->auprobe = &uprobe->arch;
>  
>  	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
>  				 srcu_read_lock_held(&uprobes_srcu)) {
> +		__u64 cookie = 0;
>  		int rc = 0;
>  
>  		if (uc->handler) {
> -			rc = uc->handler(uc, regs);
> -			WARN(rc & ~UPROBE_HANDLER_MASK,
> +			rc = uc->handler(uc, regs, &cookie);
> +			WARN(rc < 0 || rc > 2,
>  				"bad rc=0x%x from %ps()\n", rc, uc->handler);
>  		}
>  
> -		if (uc->ret_handler)
> -			need_prep = true;
> -
> +		/*
> +		 * The handler can return following values:
> +		 * 0 - execute ret_handler (if it's defined)
> +		 * 1 - remove uprobe
> +		 * 2 - do nothing (ignore ret_handler)
> +		 */
>  		remove &= rc;
>  		has_consumers = true;
> +
> +		if (rc == 0 && uc->ret_handler) {
> +			/*
> +			 * Preallocate return_instance object optimistically with
> +			 * all possible consumers, so we allocate just once.
> +			 */
> +			if (!ri) {
> +				ri = alloc_return_instance(uprobe->consumers_cnt);
> +				if (!ri)
> +					return;
> +			}
> +			ric = return_consumer_next(ri, ric);
> +			ric->cookie = cookie;
> +			ric->id = uc->id;
> +		}
>  	}
>  	current->utask->auprobe = NULL;
>  
> -	if (need_prep && !remove)
> -		prepare_uretprobe(uprobe, regs); /* put bp at return */
> +	if (ri && !remove)
> +		prepare_uretprobe(uprobe, regs, ri); /* put bp at return */
> +	else
> +		kfree(ri);
>  
>  	if (remove && has_consumers) {
>  		down_read(&uprobe->register_rwsem);
> @@ -2160,15 +2230,25 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  static void
>  handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
>  {
> +	struct return_consumer *ric = NULL;
>  	struct uprobe *uprobe = ri->uprobe;
>  	struct uprobe_consumer *uc;
> -	int srcu_idx;
> +	int srcu_idx, iter = 0;
>  
>  	srcu_idx = srcu_read_lock(&uprobes_srcu);
>  	list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
>  				 srcu_read_lock_held(&uprobes_srcu)) {
> +		/*
> +		 * If we don't find return consumer, it means uprobe consumer
> +		 * was added after we hit uprobe and return consumer did not
> +		 * get registered in which case we call the ret_handler only
> +		 * if it's not session consumer.
> +		 */
> +		ric = return_consumer_find(ri, &iter, uc->id);
> +		if (!ric && uc->session)
> +			continue;
>  		if (uc->ret_handler)
> -			uc->ret_handler(uc, ri->func, regs);
> +			uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL);
>  	}
>  	srcu_read_unlock(&uprobes_srcu, srcu_idx);
>  }
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index ac0a01cc8634..de241503c8fb 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -3332,7 +3332,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
>  }
>  
>  static int
> -uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
> +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
> +			  __u64 *data)
>  {
>  	struct bpf_uprobe *uprobe;
>  
> @@ -3341,7 +3342,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
>  }
>  
>  static int
> -uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
> +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
> +			      __u64 *data)
>  {
>  	struct bpf_uprobe *uprobe;
>  
> diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> index f7443e996b1b..11103dde897b 100644
> --- a/kernel/trace/trace_uprobe.c
> +++ b/kernel/trace/trace_uprobe.c
> @@ -88,9 +88,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
>  static int register_uprobe_event(struct trace_uprobe *tu);
>  static int unregister_uprobe_event(struct trace_uprobe *tu);
>  
> -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
> +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
> +			     __u64 *data);
>  static int uretprobe_dispatcher(struct uprobe_consumer *con,
> -				unsigned long func, struct pt_regs *regs);
> +				unsigned long func, struct pt_regs *regs,
> +				__u64 *data);
>  
>  #ifdef CONFIG_STACK_GROWSUP
>  static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> @@ -1500,7 +1502,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
>  	}
>  }
>  
> -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
> +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs,
> +			     __u64 *data)
>  {
>  	struct trace_uprobe *tu;
>  	struct uprobe_dispatch_data udd;
> @@ -1530,7 +1533,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
>  }
>  
>  static int uretprobe_dispatcher(struct uprobe_consumer *con,
> -				unsigned long func, struct pt_regs *regs)
> +				unsigned long func, struct pt_regs *regs,
> +				__u64 *data)
>  {
>  	struct trace_uprobe *tu;
>  	struct uprobe_dispatch_data udd;
> diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> index 1fc16657cf42..e91ff5b285f0 100644
> --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> @@ -421,7 +421,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
>  
>  static int
>  uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
> -		   struct pt_regs *regs)
> +		   struct pt_regs *regs, __u64 *data)
>  
>  {
>  	regs->ax  = 0x12345678deadbeef;
> -- 
> 2.46.0
> 


-- 
Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux