Re: [PATCHv2 bpf-next 1/9] uprobe: Add support for session consumer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jul 1, 2024 at 9:41 AM Jiri Olsa <jolsa@xxxxxxxxxx> wrote:
>
> Adding support for uprobe consumer to be defined as session and have
> new behaviour for consumer's 'handler' and 'ret_handler' callbacks.
>
> The session means that 'handler' and 'ret_handler' callbacks are
> connected in a way that allows to:
>
>   - control execution of 'ret_handler' from 'handler' callback
>   - share data between 'handler' and 'ret_handler' callbacks
>
> The session is enabled by setting new 'session' bool field to true
> in uprobe_consumer object.
>
> We keep count of session consumers for uprobe and allocate session_consumer
> object for each in return_instance object. This allows us to store
> return values of 'handler' callbacks and data pointers of shared
> data between both handlers.
>
> The session concept fits to our common use case where we do filtering
> on entry uprobe and based on the result we decide to run the return
> uprobe (or not).
>
> It's also convenient to share the data between session callbacks.
>
> The control of 'ret_handler' callback execution is done via return
> value of the 'handler' callback. If it's 0 we install and execute
> return uprobe, if it's 1 we do not.
>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
>  include/linux/uprobes.h     |  16 ++++-
>  kernel/events/uprobes.c     | 129 +++++++++++++++++++++++++++++++++---
>  kernel/trace/bpf_trace.c    |   6 +-
>  kernel/trace/trace_uprobe.c |  12 ++--
>  4 files changed, 144 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> index f46e0ca0169c..903a860a8d01 100644
> --- a/include/linux/uprobes.h
> +++ b/include/linux/uprobes.h
> @@ -34,15 +34,18 @@ enum uprobe_filter_ctx {
>  };
>
>  struct uprobe_consumer {
> -       int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
> +       int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
>         int (*ret_handler)(struct uprobe_consumer *self,
>                                 unsigned long func,
> -                               struct pt_regs *regs);
> +                               struct pt_regs *regs, __u64 *data);
>         bool (*filter)(struct uprobe_consumer *self,
>                                 enum uprobe_filter_ctx ctx,
>                                 struct mm_struct *mm);
>
>         struct uprobe_consumer *next;
> +
> +       bool                    session;        /* marks uprobe session consumer */
> +       unsigned int            session_id;     /* set when uprobe_consumer is registered */
>  };
>
>  #ifdef CONFIG_UPROBES
> @@ -80,6 +83,12 @@ struct uprobe_task {
>         unsigned int                    depth;
>  };
>
> +struct session_consumer {
> +       __u64           cookie;
> +       unsigned int    id;
> +       int             rc;

you'll be using u64 for ID, right? so this struct will be 24 bytes.
Maybe we can just use topmost bit of ID to store whether uretprobe
should run or not? It's trivial to mask out during ID comparisons

> +};
> +
>  struct return_instance {
>         struct uprobe           *uprobe;
>         unsigned long           func;
> @@ -88,6 +97,9 @@ struct return_instance {
>         bool                    chained;        /* true, if instance is nested */
>
>         struct return_instance  *next;          /* keep as stack */
> +
> +       int                     sessions_cnt;

there is 7 byte gap before next field, let's put sessions_cnt there

> +       struct session_consumer sessions[];
>  };
>
>  enum rp_check {
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2c83ba776fc7..4da410460f2a 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -63,6 +63,8 @@ struct uprobe {
>         loff_t                  ref_ctr_offset;
>         unsigned long           flags;
>
> +       unsigned int            sessions_cnt;
> +
>         /*
>          * The generic code assumes that it has two members of unknown type
>          * owned by the arch-specific code:
> @@ -750,11 +752,30 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
>         return uprobe;
>  }
>
> +static void
> +uprobe_consumer_account(struct uprobe *uprobe, struct uprobe_consumer *uc)
> +{
> +       static unsigned int session_id;

(besides what Peter mentioned about wrap around of 32-bit counter)
let's use atomic here to not rely on any particular locking
(unnecessarily), this might make my life easier in the future, thanks.
This is registration time, low frequency, extra atomic won't hurt.

It might be already broken, actually, for two independently registering uprobes.

> +
> +       if (uc->session) {
> +               uprobe->sessions_cnt++;
> +               uc->session_id = ++session_id ?: ++session_id;
> +       }
> +}
> +
> +static void
> +uprobe_consumer_unaccount(struct uprobe *uprobe, struct uprobe_consumer *uc)

this fits in 100 characters, keep it single line, please. Same for
account function

> +{
> +       if (uc->session)
> +               uprobe->sessions_cnt--;
> +}
> +
>  static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
>  {
>         down_write(&uprobe->consumer_rwsem);
>         uc->next = uprobe->consumers;
>         uprobe->consumers = uc;
> +       uprobe_consumer_account(uprobe, uc);
>         up_write(&uprobe->consumer_rwsem);
>  }
>
> @@ -773,6 +794,7 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
>                 if (*con == uc) {
>                         *con = uc->next;
>                         ret = true;
> +                       uprobe_consumer_unaccount(uprobe, uc);
>                         break;
>                 }
>         }
> @@ -1744,6 +1766,23 @@ static struct uprobe_task *get_utask(void)
>         return current->utask;
>  }
>
> +static size_t ri_size(int sessions_cnt)
> +{
> +       struct return_instance *ri __maybe_unused;
> +
> +       return sizeof(*ri) + sessions_cnt * sizeof(ri->sessions[0]);

just use struct_size()?

> +}
> +
> +static struct return_instance *alloc_return_instance(int sessions_cnt)
> +{
> +       struct return_instance *ri;
> +
> +       ri = kzalloc(ri_size(sessions_cnt), GFP_KERNEL);
> +       if (ri)
> +               ri->sessions_cnt = sessions_cnt;
> +       return ri;
> +}
> +
>  static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>  {
>         struct uprobe_task *n_utask;
> @@ -1756,11 +1795,11 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
>
>         p = &n_utask->return_instances;
>         for (o = o_utask->return_instances; o; o = o->next) {
> -               n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> +               n = alloc_return_instance(o->sessions_cnt);
>                 if (!n)
>                         return -ENOMEM;
>
> -               *n = *o;
> +               memcpy(n, o, ri_size(o->sessions_cnt));
>                 get_uprobe(n->uprobe);
>                 n->next = NULL;
>
> @@ -1853,9 +1892,9 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained,
>         utask->return_instances = ri;
>  }
>
> -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
> +                             struct return_instance *ri)
>  {
> -       struct return_instance *ri;
>         struct uprobe_task *utask;
>         unsigned long orig_ret_vaddr, trampoline_vaddr;
>         bool chained;
> @@ -1874,9 +1913,11 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
>                 return;
>         }
>
> -       ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> -       if (!ri)
> -               return;
> +       if (!ri) {
> +               ri = alloc_return_instance(0);
> +               if (!ri)
> +                       return;
> +       }
>
>         trampoline_vaddr = get_trampoline_vaddr();
>         orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
> @@ -2065,35 +2106,85 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
>         return uprobe;
>  }
>
> +static struct session_consumer *
> +session_consumer_next(struct return_instance *ri, struct session_consumer *sc,
> +                     int session_id)
> +{
> +       struct session_consumer *next;
> +
> +       next = sc ? sc + 1 : &ri->sessions[0];
> +       next->id = session_id;

it's kind of unexpected that "session_consumer_next" would actually
set an ID... Maybe drop int session_id as input argument and fill it
out outside of this function, this function being just a simple
iterator?

> +       return next;
> +}
> +
> +static struct session_consumer *
> +session_consumer_find(struct return_instance *ri, int *iter, int session_id)
> +{
> +       struct session_consumer *sc;
> +       int idx = *iter;
> +
> +       for (sc = &ri->sessions[idx]; idx < ri->sessions_cnt; idx++, sc++) {
> +               if (sc->id == session_id) {
> +                       *iter = idx + 1;
> +                       return sc;
> +               }
> +       }
> +       return NULL;
> +}
> +
>  static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
>  {
>         struct uprobe_consumer *uc;
>         int remove = UPROBE_HANDLER_REMOVE;
> +       struct session_consumer *sc = NULL;
> +       struct return_instance *ri = NULL;
>         bool need_prep = false; /* prepare return uprobe, when needed */
>
>         down_read(&uprobe->register_rwsem);
> +       if (uprobe->sessions_cnt) {
> +               ri = alloc_return_instance(uprobe->sessions_cnt);
> +               if (!ri)
> +                       goto out;
> +       }
> +
>         for (uc = uprobe->consumers; uc; uc = uc->next) {
> +               __u64 *cookie = NULL;
>                 int rc = 0;
>
> +               if (uc->session) {
> +                       sc = session_consumer_next(ri, sc, uc->session_id);
> +                       cookie = &sc->cookie;
> +               }
> +
>                 if (uc->handler) {
> -                       rc = uc->handler(uc, regs);
> +                       rc = uc->handler(uc, regs, cookie);
>                         WARN(rc & ~UPROBE_HANDLER_MASK,
>                                 "bad rc=0x%x from %ps()\n", rc, uc->handler);
>                 }
>
> -               if (uc->ret_handler)
> +               if (uc->session) {
> +                       sc->rc = rc;
> +                       need_prep |= !rc;

nit:

if (rc == 0)
    need_prep = true;

and then it's *extremely obvious* what happens and under which conditions

> +               } else if (uc->ret_handler) {
>                         need_prep = true;
> +               }
>
>                 remove &= rc;
>         }
>
> +       /* no removal if there's at least one session consumer */
> +       remove &= !uprobe->sessions_cnt;

this is counter (not error, not pointer), let's stick to ` == 0`, please

is this

if (uprobe->sessions_cnt != 0)
   remove = 0;

? I can't tell (honestly), without spending ridiculous amounts of
mental resources (for the underlying simplicity of the condition).

> +
>         if (need_prep && !remove)
> -               prepare_uretprobe(uprobe, regs); /* put bp at return */
> +               prepare_uretprobe(uprobe, regs, ri); /* put bp at return */
> +       else
> +               kfree(ri);
>
>         if (remove && uprobe->consumers) {
>                 WARN_ON(!uprobe_is_active(uprobe));
>                 unapply_uprobe(uprobe, current->mm);
>         }
> + out:
>         up_read(&uprobe->register_rwsem);
>  }
>

[...]





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux