Re: BPF skels in perf .Re: [GIT PULL] perf tools changes for v6.4

Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> · Thu, 4 May 2023 18:48:50 -0300

Em Thu, May 04, 2023 at 04:07:29PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, May 04, 2023 at 11:50:07AM -0700, Andrii Nakryiko escreveu:
> > On Thu, May 4, 2023 at 10:52 AM Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> wrote:
> > > Andrii, can you add some more information about the usage of vmlinux.h
> > > instead of using kernel headers?
>  
> > I'll just say that vmlinux.h is not a hard requirement to build BPF
> > programs, it's more a convenience allowing easy access to definitions
> > of both UAPI and kernel-internal structures for tracing needs and
> > marking them relocatable using BPF CO-RE machinery. Lots of real-world
> > applications just check-in pregenerated vmlinux.h to avoid build-time
> > dependency on up-to-date host kernel and such.
>  
> > If vmlinux.h generation and usage is causing issues, though, given
> > that perf's BPF programs don't seem to be using many different kernel
> > types, it might be a better option to just use UAPI headers for public
> > kernel type definitions, and just define CO-RE-relocatable minimal
> > definitions locally in perf's BPF code for the other types necessary.
> > E.g., if perf needs only pid and tgid from task_struct, this would
> > suffice:
>  
> > struct task_struct {
> >     int pid;
> >     int tgid;
> > } __attribute__((preserve_access_index));
> 
> Yeah, that seems like a way better approach, no vmlinux involved, libbpf
> CO-RE notices that task_struct changed from this two integers version
> (of course) and does the relocation to where it is in the running kernel
> by using /sys/kernel/btf/vmlinux.

Doing it for one of the skels, build tested, runtime untested, but not
using any vmlinux, BTF to help, not that bad, more verbose, but at least
we state what are the fields we actually use, have those attribute
documenting that those offsets will be recorded for future use, etc.

Namhyung, can you please check that this works?

Thanks,

- Arnaldo

diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index 6a438e0102c5a2cb..f376d162549ebd74 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -1,11 +1,40 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (c) 2021 Facebook
 // Copyright (c) 2021 Google
-#include "vmlinux.h"
+#include <linux/types.h>
+#include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
+// libbpf's CO-RE will take care of the relocations so that these fields match
+// the layout of these structs in the kernel where this ends up running on.
+
+struct cgroup_subsys_state {
+	struct cgroup *cgroup;
+} __attribute__((preserve_access_index));
+
+struct css_set {
+	struct cgroup_subsys_state *subsys[13];
+} __attribute__((preserve_access_index));
+
+struct task_struct {
+	struct css_set *cgroups;
+} __attribute__((preserve_access_index));
+
+struct kernfs_node {
+	__u64 id;
+}  __attribute__((preserve_access_index));
+
+struct cgroup {
+	struct kernfs_node *kn;
+	int                level;
+}  __attribute__((preserve_access_index));
+
+enum cgroup_subsys_id {
+	perf_event_cgrp_id  = 8,
+};
+
 #define MAX_LEVELS  10  // max cgroup hierarchy level: arbitrary
 #define MAX_EVENTS  32  // max events per cgroup: arbitrary
 
@@ -52,7 +81,7 @@ struct cgroup___new {
 /* old kernel cgroup definition */
 struct cgroup___old {
 	int level;
-	u64 ancestor_ids[];
+	__u64 ancestor_ids[];
 } __attribute__((preserve_access_index));
 
 const volatile __u32 num_events = 1;