Re: [RFC PATCH v3] bpf: Using binary search to improve the performance of btf_find_by_name_kind

Donglin Peng <dolinux.peng@xxxxxxxxx> · Mon, 10 Jun 2024 00:12:07 +0800

On Sun, Jun 9, 2024 at 3:23 PM Masami Hiramatsu <mhiramat@xxxxxxxxxx> wrote:
>
> Hi
>
> On Sat,  8 Jun 2024 07:08:35 -0700
> Donglin Peng <dolinux.peng@xxxxxxxxx> wrote:
>
> > Currently, we are only using the linear search method to find the type id
> > by the name, which has a time complexity of O(n). This change involves
> > sorting the names of btf types in ascending order and using binary search,
> > which has a time complexity of O(log(n)). This idea was inspired by the
> > following patch:
> >
> > 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()").
> >
> > At present, this improvement is only for searching in vmlinux's and
> > module's BTFs.
> >
> > Another change is the search direction, where we search the BTF first and
> > then its base, the type id of the first matched btf_type will be returned.
> >
> > Here is a time-consuming result that finding 87590 type ids by their names in
> > vmlinux's BTF.
> >
> > Before: 158426 ms
> > After:     114 ms
> >
> > The average lookup performance has improved more than 1000x in the above scenario.
>
> This looks great improvement! so searching function entry in ~2us?

Yes, it is the average test result on a Ubuntu VM. However, if you are
specifically
searching for a particular btf_type, it may take more time due to cache misses.

The data is obtained from the following test code:

 /*
  * total: the number of btf_type
  * nr_names: the number of btf_type that has a name
  *
  */

 pr_info("Begin to test ... total: %d, nr_names: %d\n", total, nr_names);

 t0 = ktime_get_ns();
 for (i = 0; i < nr_names; i++) {
         if (btf_find_by_name_kind(btf, tn[i].name, tn[i].kind) <= 0) {
                 pr_err("Find failed: name: %s, kind: %d\n",
tn[i].name, tn[i].kind);
                 break;
         }

         if ((i & 0xfff) == 0)
                 touch_nmi_watchdog();
 }
 delta = ktime_get_ns() - t0;

 pr_info("Consume total: %llu ms,  Per btf_type: %llu ns\n", delta /
NSEC_PER_MSEC, delta / nr_names);

Using binary search:
[   40.381095] Begin to test ... total: 155010, nr_names: 87596
[   40.493024] Consume total: 111 ms,  Per btf_type: 1275 ns

Using linear search:
[   56.464520] Begin to test ... total: 155003, nr_names: 87591
[  213.344919] Consume total: 156880 ms,  Per btf_type: 1791054 ns

> BTW, I have some comments below, but basically it looks good to me and
> it passed my ftracetest.

Thank you.

>
> Tested-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>
>

I will include this in v4.

> >
> > Signed-off-by: Donglin Peng <dolinux.peng@xxxxxxxxx>
> > ---
> > Changes in RFC v3:
> >  - Sort the btf types during the build process in order to reduce memory usage
> >    and decrease boot time.
> >
> > RFC v2:
> >  - https://lore.kernel.org/all/20230909091646.420163-1-pengdonglin@xxxxxxxxxxxxxx
> > ---
> >  include/linux/btf.h |   1 +
> >  kernel/bpf/btf.c    | 160 +++++++++++++++++++++++++++++++++---
> >  tools/lib/bpf/btf.c | 195 ++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 345 insertions(+), 11 deletions(-)
> >
> > diff --git a/include/linux/btf.h b/include/linux/btf.h
> > index f9e56fd12a9f..1dc1000a7dc9 100644
> > --- a/include/linux/btf.h
> > +++ b/include/linux/btf.h
> > @@ -214,6 +214,7 @@ bool btf_is_kernel(const struct btf *btf);
> >  bool btf_is_module(const struct btf *btf);
> >  struct module *btf_try_get_module(const struct btf *btf);
> >  u32 btf_nr_types(const struct btf *btf);
> > +u32 btf_type_cnt(const struct btf *btf);
> >  bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
> >                          const struct btf_member *m,
> >                          u32 expected_offset, u32 expected_size);
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index 821063660d9f..5b7b464204bf 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -262,6 +262,7 @@ struct btf {
> >       u32 data_size;
> >       refcount_t refcnt;
> >       u32 id;
> > +     u32 nr_types_sorted;
> >       struct rcu_head rcu;
> >       struct btf_kfunc_set_tab *kfunc_set_tab;
> >       struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab;
> > @@ -542,23 +543,102 @@ u32 btf_nr_types(const struct btf *btf)
> >       return total;
> >  }
> >
> > -s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> > +u32 btf_type_cnt(const struct btf *btf)
> > +{
> > +     return btf->start_id + btf->nr_types;
> > +}
> > +
> > +static s32 btf_find_by_name_bsearch(const struct btf *btf, const char *name,
> > +                                 int *start, int *end)
> >  {
> >       const struct btf_type *t;
> > -     const char *tname;
> > -     u32 i, total;
> > +     const char *name_buf;
> > +     int low, low_start, mid, high, high_end;
> > +     int ret, start_id;
> > +
> > +     start_id = btf->base_btf ? btf->start_id : 1;
> > +     low_start = low = start_id;
> > +     high_end = high = start_id + btf->nr_types_sorted - 1;
> > +
> > +     while (low <= high) {
> > +             mid = low + (high - low) / 2;
> > +             t = btf_type_by_id(btf, mid);
> > +             name_buf = btf_name_by_offset(btf, t->name_off);
> > +             ret = strcmp(name, name_buf);
> > +             if (ret > 0)
> > +                     low = mid + 1;
> > +             else if (ret < 0)
> > +                     high = mid - 1;
> > +             else
> > +                     break;
> > +     }
> >
> > -     total = btf_nr_types(btf);
> > -     for (i = 1; i < total; i++) {
> > -             t = btf_type_by_id(btf, i);
> > -             if (BTF_INFO_KIND(t->info) != kind)
> > -                     continue;
> > +     if (low > high)
> > +             return -ESRCH;
>
> nit: -ENOENT ?

Good, I will update it in the v4.

>
> >
> > -             tname = btf_name_by_offset(btf, t->name_off);
> > -             if (!strcmp(tname, name))
> > -                     return i;
> > +     if (start) {
> > +             low = mid;
> > +             while (low > low_start) {
> > +                     t = btf_type_by_id(btf, low-1);
> > +                     name_buf = btf_name_by_offset(btf, t->name_off);
> > +                     if (strcmp(name, name_buf))
> > +                             break;
> > +                     low--;
> > +             }
> > +             *start = low;
> >       }
> >
> > +     if (end) {
> > +             high = mid;
> > +             while (high < high_end) {
> > +                     t = btf_type_by_id(btf, high+1);
> > +                     name_buf = btf_name_by_offset(btf, t->name_off);
> > +                     if (strcmp(name, name_buf))
> > +                             break;
> > +                     high++;
> > +             }
> > +             *end = high;
> > +     }
> > +
> > +     return mid;
> > +}
> > +
> > +s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
> > +{
> > +     const struct btf_type *t;
> > +     const char *tname;
> > +     int start, end;
> > +     s32 id, total;
> > +
> > +     do {
> > +             if (btf->nr_types_sorted) {
> > +                     /* binary search */
> > +                     id = btf_find_by_name_bsearch(btf, name, &start, &end);
> > +                     if (id > 0) {
> > +                             while (start <= end) {
> > +                                     t = btf_type_by_id(btf, start);
> > +                                     if (BTF_INFO_KIND(t->info) == kind)
> > +                                             return start;
> > +                                     start++;
> > +                             }
> > +                     }
> > +             } else {
> > +                     /* linear search */
> > +                     total = btf_type_cnt(btf);
> > +                     for (id = btf->base_btf ? btf->start_id : 1;
> > +                             id < total; id++) {
> > +                             t = btf_type_by_id(btf, id);
> > +                             if (BTF_INFO_KIND(t->info) != kind)
> > +                                     continue;
> > +
> > +                             tname = btf_name_by_offset(btf, t->name_off);
> > +                             if (!strcmp(tname, name))
> > +                                     return id;
> > +                     }
> > +             }
> > +             btf = btf->base_btf;
> > +     } while (btf);
> > +
> >       return -ENOENT;
> >  }
> >
> > @@ -5979,6 +6059,56 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty
> >       return kctx_type_id;
> >  }
> >
> > +static int btf_check_sort(struct btf *btf, int start_id)
> > +{
> > +     int i, n, nr_names = 0;
> > +
> > +     n = btf_nr_types(btf);
> > +     for (i = start_id; i < n; i++) {
> > +             const struct btf_type *t;
> > +             const char *name;
> > +
> > +             t = btf_type_by_id(btf, i);
> > +             if (!t)
> > +                     return -EINVAL;
> > +
> > +             name = btf_str_by_offset(btf, t->name_off);
> > +             if (!str_is_empty(name))
> > +                     nr_names++;
>
> else {
>         goto out;
> }
> ? (*)
>

No need for that. The purpose of the for loop here is to count the
number of btf_types
with names. If the BTF file is sorted, the btf_types with names will
be placed at the
beginning, while the btf_types without names will be appended at the end.

> > +     }
> > +
> > +     if (nr_names < 3)
> > +             goto out;
>
> What does this `3` mean?

It is just my own opinion. The binary search method does not offer any
superiority if there
 are only 1 or 2 btf_types with names.

>
> > +
> > +     for (i = 0; i < nr_names - 1; i++) {
> > +             const struct btf_type *t1, *t2;
> > +             const char *s1, *s2;
> > +
> > +             t1 = btf_type_by_id(btf, start_id + i);
> > +             if (!t1)
> > +                     return -EINVAL;
> > +
> > +             s1 = btf_str_by_offset(btf, t1->name_off);
> > +             if (str_is_empty(s1))
> > +                     goto out;
>
> Why not continue? This case is expected, or the previous block (*)
> must go `out` at that point.

The purpose of the for loop here is to verify whether the first
nr_name btf_types are sorted
or not. If the BTF file is indeed sorted, btf_types with names will be
placed at the beginning,
and the number of btf_types with names is equal to nr_names. However,
if a btf_type without a
name is found within the first nr_name btf_types, it indicates that
the BTF file is not sorted.

>
> > +
> > +             t2 = btf_type_by_id(btf, start_id + i + 1);
> > +             if (!t2)
> > +                     return -EINVAL;
> > +
> > +             s2 = btf_str_by_offset(btf, t2->name_off);
> > +             if (str_is_empty(s2))
> > +                     goto out;
>
> Ditto.
>

Ditto.

> > +
> > +             if (strcmp(s1, s2) > 0)
> > +                     goto out;
> > +     }
> > +
> > +     btf->nr_types_sorted = nr_names;
> > +out:
> > +     return 0;
> > +}
> > +
> >  BTF_ID_LIST(bpf_ctx_convert_btf_id)
> >  BTF_ID(struct, bpf_ctx_convert)
> >
> > @@ -6029,6 +6159,10 @@ struct btf *btf_parse_vmlinux(void)
> >       if (err)
> >               goto errout;
> >
> > +     err = btf_check_sort(btf, 1);
>
> Why `1`?

The start ID for the sorted btf_types in vmlinux's BTF is 1, as the
btf_void is placed before
the sorted types.

>
> > +     if (err)
> > +             goto errout;
> > +
> >       /* btf_parse_vmlinux() runs under bpf_verifier_lock */
> >       bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
> >
> > @@ -6111,6 +6245,10 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u
> >       if (err)
> >               goto errout;
> >
> > +     err = btf_check_sort(btf, btf_nr_types(base_btf));
> > +     if (err)
> > +             goto errout;
> > +
> >       btf_verifier_env_free(env);
> >       refcount_set(&btf->refcnt, 1);
> >       return btf;
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index 2d0840ef599a..93c1ab677bfa 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1,6 +1,9 @@
> >  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> >  /* Copyright (c) 2018 Facebook */
> >
> > +#ifndef _GNU_SOURCE
> > +#define _GNU_SOURCE
> > +#endif
> >  #include <byteswap.h>
> >  #include <endian.h>
> >  #include <stdio.h>
> > @@ -3072,6 +3075,7 @@ static int btf_dedup_ref_types(struct btf_dedup *d);
> >  static int btf_dedup_resolve_fwds(struct btf_dedup *d);
> >  static int btf_dedup_compact_types(struct btf_dedup *d);
> >  static int btf_dedup_remap_types(struct btf_dedup *d);
> > +static int btf_sort_type_by_name(struct btf *btf);
> >
> >  /*
> >   * Deduplicate BTF types and strings.
> > @@ -3270,6 +3274,11 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
> >               pr_debug("btf_dedup_remap_types failed:%d\n", err);
> >               goto done;
> >       }
> > +     err = btf_sort_type_by_name(btf);
> > +     if (err < 0) {
> > +             pr_debug("btf_sort_type_by_name failed:%d\n", err);
> > +             goto done;
> > +     }
> >
> >  done:
> >       btf_dedup_free(d);
> > @@ -5212,3 +5221,189 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
> >
> >       return 0;
> >  }
> > +
> > +static int btf_compare_type_name(const void *a, const void *b, void *priv)
> > +{
> > +     struct btf *btf = (struct btf *)priv;
> > +     __u32 ta = *(const __u32 *)a;
> > +     __u32 tb = *(const __u32 *)b;
> > +     struct btf_type *bta, *btb;
> > +     const char *na, *nb;
> > +
> > +     bta = (struct btf_type *)(btf->types_data + ta);
> > +     btb = (struct btf_type *)(btf->types_data + tb);
> > +     na = btf__str_by_offset(btf, bta->name_off);
> > +     nb = btf__str_by_offset(btf, btb->name_off);
> > +
> > +     return strcmp(na, nb);
> > +}
> > +
> > +static int btf_compare_offs(const void *o1, const void *o2)
> > +{
> > +     __u32 *offs1 = (__u32 *)o1;
> > +     __u32 *offs2 = (__u32 *)o2;
> > +
> > +     return *offs1 - *offs2;
> > +}
> > +
> > +static inline __u32 btf_get_mapped_type(struct btf *btf, __u32 *maps, __u32 type)
> > +{
> > +     if (type < btf->start_id)
> > +             return type;
> > +     return maps[type - btf->start_id] + btf->start_id;
> > +}
> > +
> > +/*
> > + * Collect and move the btf_types with names to the start location, and
> > + * sort them in ascending order by name, so we can use the binary search
> > + * method.
> > + */
> > +static int btf_sort_type_by_name(struct btf *btf)
> > +{
> > +     struct btf_type *bt;
> > +     __u32 *new_type_offs = NULL, *new_type_offs_noname = NULL;
> > +     __u32 *maps = NULL, *found_offs;
> > +     void *new_types_data = NULL, *loc_data;
> > +     int i, j, k, type_cnt, ret = 0, type_size;
> > +     __u32 data_size;
> > +
> > +     if (btf_ensure_modifiable(btf))
> > +             return libbpf_err(-ENOMEM);
> > +
> > +     type_cnt = btf->nr_types;
> > +     data_size = btf->type_offs_cap * sizeof(*new_type_offs);
> > +
> > +     maps = (__u32 *)malloc(type_cnt * sizeof(__u32));
> > +     if (!maps) {
> > +             ret = -ENOMEM;
> > +             goto err_out;
> > +     }
> > +
> > +     new_type_offs = (__u32 *)malloc(data_size);
>
> nit:
> new_type_offs = (__u32 *)calloc(btf->type_offs_cap, sizeof(*new_type_offs))

Okay, I will update it in v4.

>
> > +     if (!new_type_offs) {
> > +             ret = -ENOMEM;
> > +             goto err_out;
> > +     }
> > +
> > +     new_type_offs_noname = (__u32 *)malloc(data_size);
> > +     if (!new_type_offs_noname) {
> > +             ret = -ENOMEM;
> > +             goto err_out;
> > +     }
> > +
> > +     new_types_data = malloc(btf->types_data_cap);
> > +     if (!new_types_data) {
> > +             ret = -ENOMEM;
> > +             goto err_out;
> > +     }
> > +
> > +     memset(new_type_offs, 0, data_size);
>
> Then this memset is not required.

Good, I will update it in v4.

>
> Thank you,
>
>
> --
> Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>