On Wed, Aug 28, 2024 at 3:02 PM Eduard Zingerman <eddyz87@xxxxxxxxx> wrote:
On Wed, 2024-08-28 at 17:46 +0000, Ihor Solodrai wrote:
%.bpf.o objects depend on vmlinux.h, which makes them transitively
dependent on unnecessary libbpf headers. However vmlinux.h doesn't
actually change as often.
When generating vmlinux.h, compare it to a previous version and update
it only if there are changes.
Example of build time improvement (after first clean build):
$ touch ../../../lib/bpf/bpf.h
$ time make -j8
Before: real 1m37.592s
After: real 0m27.310s
Notice that %.bpf.o gen step is skipped if vmlinux.h hasn't changed.
Link: https://lore.kernel.org/bpf/CAEf4BzY1z5cC7BKye8=A8aTVxpsCzD=p1jdTfKC7i0XVuYoHUQ@xxxxxxxxxxxxxx
Signed-off-by: Ihor Solodrai <ihor.solodrai@xxxxx>
---
Unfortunately, I think that this is a half-measure.
E.g. the following command forces tests rebuild for me:
touch ../../../../kernel/bpf/verifier.c; \
make -j22 -C ../../../../; \
time make test_progs
To workaround this we need to enable reproducible_build option:
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index b75f09f3f424..8cd648f3e32b 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -19,7 +19,7 @@ pahole-flags-$(call test-ge, $(pahole-ver), 125) += --skip_encoding_btf_inconsis
else
# Switch to using --btf_features for v1.26 and later.
-pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
+pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs,reproducible_build
ifneq ($(KBUILD_EXTMOD),)
module-pahole-flags-$(call test-ge, $(pahole-ver), 126) += --btf_features=distilled_base
Question to the mailing list: do we want this?
Alan, can you please give us a summary of what are the consequences of
the reproducible_build pahole option? In terms of performance and
otherwise.
I've applied patches as is, despite them not solving the issue
completely, as they are moving us in the right direction anyways. I do
get slightly different BTF every single time I rebuild my kernel, so
the change in patch #2 doesn't yet help me.
For libbpf headers, Ihor, can you please follow up with adding
bpf_helper_defs.h as a dependency?
I have some ideas on how to make BTF regeneration in vmlinux.h itself
unnecessary, that might help with this issue. Separately (depending on
what are the negatives of the reproducible_build option) we can look
into making pahole have more consistent internal BTF type ordering
without negatively affecting the overall BTF dedup performance in
pahole. Hopefully I can work with Ihor on this as follow ups.
P.S. I also spent more time than I'm willing to admit trying to
improve bpftool's BTF sorting to minimize the chance of vmlinux.h
contents being different, and I think I removed a bunch of cases where
we had unnecessary differences, but still, it's fundamentally
non-deterministic to do everything based on type and field names,
unfortunately.
Anyways, Mykyta (cc'ed), what do you think about the changes below?
Note that I'm also fixing the incorrect handling of enum64 (would be
nice to prepare a proper patch and send it upstream, if you get a
chance).
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 6789c7a4d5ca..e8a244b09d56 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -50,6 +50,7 @@ struct sort_datum {
int type_rank;
const char *sort_name;
const char *own_name;
+ __u64 disambig_hash;
};
static const char *btf_int_enc_str(__u8 encoding)
@@ -552,35 +553,92 @@ static int btf_type_rank(const struct btf *btf,
__u32 index, bool has_name)
}
}
-static const char *btf_type_sort_name(const struct btf *btf, __u32
index, bool from_ref)
+static const char *btf_type_sort_name(const struct btf *btf, __u32
index, bool from_ref, const char *typedef_name)
{
const struct btf_type *t = btf__type_by_id(btf, index);
+ int name_off;
switch (btf_kind(t)) {
case BTF_KIND_ENUM:
- case BTF_KIND_ENUM64: {
- int name_off = t->name_off;
-
/* Use name of the first element for anonymous enums
if allowed */
if (!from_ref && !t->name_off && btf_vlen(t))
name_off = btf_enum(t)->name_off;
+ else
+ name_off = t->name_off;
+
+ return btf__name_by_offset(btf, name_off);
+ case BTF_KIND_ENUM64:
+ /* Use name of the first element for anonymous enums
if allowed */
+ if (!from_ref && !t->name_off && btf_vlen(t))
+ name_off = btf_enum64(t)->name_off;
+ else
+ name_off = t->name_off;
return btf__name_by_offset(btf, name_off);
- }
case BTF_KIND_ARRAY:
- return btf_type_sort_name(btf, btf_array(t)->type, true);
+ return btf_type_sort_name(btf, btf_array(t)->type,
true, typedef_name);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ if (t->name_off == 0)
+ return typedef_name;
+ return btf__name_by_offset(btf, t->name_off);
+ case BTF_KIND_TYPEDEF:
+ return btf_type_sort_name(btf, t->type, true,
+ btf__name_by_offset(btf,
t->name_off));
case BTF_KIND_TYPE_TAG:
case BTF_KIND_CONST:
case BTF_KIND_PTR:
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
- case BTF_KIND_TYPEDEF:
case BTF_KIND_DECL_TAG:
- return btf_type_sort_name(btf, t->type, true);
+ return btf_type_sort_name(btf, t->type, true, typedef_name);
default:
return btf__name_by_offset(btf, t->name_off);
}
- return NULL;
+}
+
+static __u64 hasher(__u64 hash, __u64 val)
+{
+ return hash * 31 + val;
+}
+
+static __u64 btf_type_disambig_hash(const struct btf *btf, __u32 index)
+{
+ const struct btf_type *t = btf__type_by_id(btf, index);
+ int i;
+ size_t hash = 0;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_ENUM:
+ hash = hasher(hash, t->size);
+ for (i = 0; i < btf_vlen(t); i++)
+ hash = hasher(hash, btf_enum(t)[i].name_off);
+ break;
+ case BTF_KIND_ENUM64:
+ hash = hasher(hash, t->size);
+ for (i = 0; i < btf_vlen(t); i++)
+ hash = hasher(hash, btf_enum64(t)[i].name_off);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m;
+ const char *ftname;
+
+ hash = hasher(hash, t->size);
+ for (i = 0; i < btf_vlen(t); i++) {
+ m = btf_members(t) + i;
+ hash = hasher(hash, m->name_off);
+
+ /* resolve field type's name and hash it as well */
+ ftname = btf_type_sort_name(btf, m->type, false, "");
+ hash = hasher(hash, str_hash(ftname));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return hash;
}
static int btf_type_compare(const void *left, const void *right)
@@ -596,7 +654,14 @@ static int btf_type_compare(const void *left,
const void *right)
if (r)
return r;
- return strcmp(d1->own_name, d2->own_name);
+ r = strcmp(d1->own_name, d2->own_name);
+ if (r)
+ return r;
+
+ if (d1->disambig_hash != d2->disambig_hash)
+ return d1->disambig_hash < d2->disambig_hash ? -1 : 1;
+
+ return d1->index < d2->index ? -1 : 1;
}
static struct sort_datum *sort_btf_c(const struct btf *btf)
@@ -615,8 +680,9 @@ static struct sort_datum *sort_btf_c(const struct btf *btf)
d->index = i;
d->type_rank = btf_type_rank(btf, i, false);
- d->sort_name = btf_type_sort_name(btf, i, false);
+ d->sort_name = btf_type_sort_name(btf, i, false, "");
d->own_name = btf__name_by_offset(btf, t->name_off);
+ d->disambig_hash = btf_type_disambig_hash(btf, i);
}
qsort(datums, n, sizeof(struct sort_datum), btf_type_compare);