Re: [PATCH bpf-next 4/8] bpf: Take module reference on kprobe_multi link

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Thu, 13 Oct 2022 11:50:54 -0700

On Sun, Oct 9, 2022 at 3:00 PM Jiri Olsa <jolsa@xxxxxxxxxx> wrote:
>
> Currently we allow to create kprobe multi link on function from kernel
> module, but we don't take the module reference to ensure it's not
> unloaded while we are tracing it.
>
> The multi kprobe link is based on fprobe/ftrace layer which takes
> different approach and releases ftrace hooks when module is unloaded
> even if there's tracer registered on top of it.
>
> Adding code that gathers all the related modules for the link and takes
> their references before it's attached. All kernel module references are
> released after link is unregistered.
>
> Note that we do it the same way already for trampoline probes
> (but for single address).
>
> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx>
> ---
>  kernel/trace/bpf_trace.c | 100 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 100 insertions(+)
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 9be1a2b6b53b..f3d7565fee79 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -2447,6 +2447,8 @@ struct bpf_kprobe_multi_link {
>         unsigned long *addrs;
>         u64 *cookies;
>         u32 cnt;
> +       struct module **mods;
> +       u32 mods_cnt;
>  };
>
>  struct bpf_kprobe_multi_run_ctx {
> @@ -2502,6 +2504,14 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32
>         return err;
>  }
>
> +static void kprobe_multi_put_modules(struct module **mods, u32 cnt)
> +{
> +       u32 i;
> +
> +       for (i = 0; i < cnt; i++)
> +               module_put(mods[i]);
> +}
> +
>  static void free_user_syms(struct user_syms *us)
>  {
>         kvfree(us->syms);
> @@ -2514,6 +2524,7 @@ static void bpf_kprobe_multi_link_release(struct bpf_link *link)
>
>         kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
>         unregister_fprobe(&kmulti_link->fp);
> +       kprobe_multi_put_modules(kmulti_link->mods, kmulti_link->mods_cnt);
>  }
>
>  static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
> @@ -2523,6 +2534,7 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
>         kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
>         kvfree(kmulti_link->addrs);
>         kvfree(kmulti_link->cookies);
> +       kfree(kmulti_link->mods);
>         kfree(kmulti_link);
>  }
>
> @@ -2658,6 +2670,80 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv)
>         }
>  }
>
> +struct module_addr_args {
> +       unsigned long *addrs;
> +       u32 addrs_cnt;
> +       struct module **mods;
> +       int mods_cnt;
> +       int mods_alloc;
> +};
> +
> +static int module_callback(void *data, const char *name,
> +                          struct module *mod, unsigned long addr)
> +{
> +       struct module_addr_args *args = data;
> +       bool realloc = !args->mods;
> +       struct module **mods;
> +
> +       /* We iterate all modules symbols and for each we:
> +        * - search for it in provided addresses array
> +        * - if found we check if we already have the module pointer stored
> +        *   (we iterate modules sequentially, so we can check just the last
> +        *   module pointer)
> +        * - take module reference and store it
> +        */
> +       if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(unsigned long),

nit: sizeof(addr) is shorter and will stay in sync with addr variable?

> +                      bpf_kprobe_multi_addrs_cmp))
> +               return 0;
> +
> +       if (args->mods) {
> +               struct module *prev = NULL;
> +
> +               if (args->mods_cnt > 1)
> +                       prev = args->mods[args->mods_cnt - 1];

doesn't args->mods != NULL imply that args->mods_cnt > 1?

> +               if (prev == mod)
> +                       return 0;
> +               if (args->mods_cnt == args->mods_alloc)

nit: in libbpf we consistently use the cnt and cap (capacity)
terminology for this, "mods_alloc" reads like a bool flag or something

> +                       realloc = true;
> +       }
> +
> +       if (realloc) {
> +               args->mods_alloc += 100;

agree with Song, this looks pretty arbitrary and quite large. Again,
from libbpf experience, we do something like:

mods_alloc = max(16, mods_alloc * 3 / 2);

so grow by 50%, but start of with reasonable 16-element array. We can
use similar approach here.

> +               mods = krealloc_array(args->mods, args->mods_alloc, sizeof(*mods), GFP_KERNEL);
> +               if (!mods)
> +                       return -ENOMEM;
> +               args->mods = mods;
> +       }

Previous two blocks read pretty convoluted. Isn't it equivalent to simpler:

if (args->mods && args->mods[args->mods_cnt - 1] == mod)
    return 0;

if (args->mods_cnt == args->mods_alloc /* but I'd use mods_cap */) {
    /* realloc here */
}

> +
> +       if (!try_module_get(mod))
> +               return -EINVAL;
> +
> +       args->mods[args->mods_cnt] = mod;
> +       args->mods_cnt++;
> +       return 0;
> +}
> +

[...]