Re: [HELP] failed to resolve CO-RE relocation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 26, 2023 at 9:53 PM Namhyung Kim <namhyung@xxxxxxxxx> wrote:
>
> On Wed, Apr 26, 2023 at 09:26:59PM -0700, Andrii Nakryiko wrote:
> > On Wed, Apr 26, 2023 at 7:21 PM Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
> > >
> > > Hello Andrii,
> > >
> > > On Wed, Apr 26, 2023 at 6:19 PM Andrii Nakryiko
> > > <andrii.nakryiko@xxxxxxxxx> wrote:
> > > >
> > > > On Wed, Apr 26, 2023 at 5:14 PM Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
> > > > >
> > > > > Hello,
> > > > >
> > > > > I'm having a problem of loading perf lock contention BPF program [1]
> > > > > on old kernels.  It has collect_lock_syms() to get the address of each
> > > > > CPU's run-queue lock.  The kernel 5.14 changed the name of the field
> > > > > so there's bpf_core_field_exists to check the name like below.
> > > > >
> > > > >         if (bpf_core_field_exists(rq_new->__lock))
> > > > >                 lock_addr = (__u64)&rq_new->__lock;
> > > > >         else
> > > > >                 lock_addr = (__u64)&rq_old->lock;
> > > >
> > > > I suspect compiler rewrites it to something like
> > > >
> > > >    lock_addr = (__u64)&rq_old->lock;
> > > >    if (bpf_core_field_exists(rq_new->__lock))
> > > >         lock_addr = (__u64)&rq_new->__lock;
> > > >
> > > > so rq_old relocation always happens and ends up being not guarded
> > > > properly. You can try adding barrier_var(rq_new) and
> > > > barrier_var(rq_old) around if and inside branches, that should
> > > > pessimize compiler
> > > >
> > > > alternatively if you do
> > > >
> > > > if (bpf_core_field_exists(rq_new->__lock))
> > > >     lock_addr = (__u64)&rq_new->__lock;
> > > > else if (bpf_core_field_exists(rq_old->lock))
> > > >     lock_addr = (__u64)&rq_old->lock;
> > > > else
> > > >     lock_addr = 0; /* or signal error somehow */
> > > >
> > > > It might work as well.
> > >
> > > Thanks a lot for your comment!
> > >
> > > I've tried the below code but no luck. :(
> >
> > Can you post an output of llvm-objdump -d <your.bpf.o> of the program
> > (collect_lock_syms?) containing above code (or at least relevant
> > portions with some buffer before/after to get a sense of what's going
> > on)
>
> Sure.
>
> Here's the full source code:
>
>   SEC("raw_tp/bpf_test_finish")
>   int BPF_PROG(collect_lock_syms)
>   {
>         __u64 lock_addr;
>         __u32 lock_flag;
>
>         for (int i = 0; i < MAX_CPUS; i++) {
>                 struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
>                 struct rq___new *rq_new = (void *)rq;
>                 struct rq___old *rq_old = (void *)rq;
>
>                 if (rq == NULL)
>                         break;
>
>                 barrier_var(rq_old);
>                 barrier_var(rq_new);
>
>                 if (bpf_core_field_exists(rq_old->lock)) {
>                         barrier_var(rq_old);
>                         lock_addr = (__u64)&rq_old->lock;
>                 } else if (bpf_core_field_exists(rq_new->__lock)) {
>                         barrier_var(rq_new);
>                         lock_addr = (__u64)&rq_new->__lock;
>                 } else
>                         continue;
>
>                 lock_flag = LOCK_CLASS_RQLOCK;
>                 bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
>         }
>         return 0;
>   }
>
> And the disassembly is:
>
>   $ llvm-objdump -d util/bpf_skel/.tmp/lock_contention.bpf.o
>   ...
>   Disassembly of section raw_tp/bpf_test_finish:
>
>   0000000000000000 <collect_lock_syms>:
>        0:       b7 06 00 00 00 00 00 00 r6 = 0
>        1:       b7 07 00 00 01 00 00 00 r7 = 1
>        2:       b7 09 00 00 01 00 00 00 r9 = 1
>        3:       b7 08 00 00 00 00 00 00 r8 = 0
>
>   0000000000000020 <LBB3_1>:
>        4:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
>        6:       bf 62 00 00 00 00 00 00 r2 = r6
>        7:       85 00 00 00 99 00 00 00 call 153
>        8:       15 00 12 00 00 00 00 00 if r0 == 0 goto +18 <LBB3_8>
>        9:       bf 01 00 00 00 00 00 00 r1 = r0
>       10:       15 07 12 00 00 00 00 00 if r7 == 0 goto +18 <LBB3_4>
>       11:       0f 81 00 00 00 00 00 00 r1 += r8
>
>   0000000000000060 <LBB3_6>:
>       12:       63 9a f4 ff 00 00 00 00 *(u32 *)(r10 - 12) = r9
>       13:       7b 1a f8 ff 00 00 00 00 *(u64 *)(r10 - 8) = r1
>       14:       bf a2 00 00 00 00 00 00 r2 = r10
>       15:       07 02 00 00 f8 ff ff ff r2 += -8
>       16:       bf a3 00 00 00 00 00 00 r3 = r10
>       17:       07 03 00 00 f4 ff ff ff r3 += -12
>       18:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
>       20:       b7 04 00 00 00 00 00 00 r4 = 0
>       21:       85 00 00 00 02 00 00 00 call 2
>
>   00000000000000b0 <LBB3_7>:
>       22:       07 06 00 00 01 00 00 00 r6 += 1
>       23:       bf 61 00 00 00 00 00 00 r1 = r6
>       24:       67 01 00 00 20 00 00 00 r1 <<= 32
>       25:       77 01 00 00 20 00 00 00 r1 >>= 32
>       26:       55 01 e9 ff 00 04 00 00 if r1 != 1024 goto -23 <LBB3_1>
>
>   00000000000000d8 <LBB3_8>:
>       27:       b7 00 00 00 00 00 00 00 r0 = 0
>       28:       95 00 00 00 00 00 00 00 exit
>
>   00000000000000e8 <LBB3_4>:
>       29:       b7 01 00 00 01 00 00 00 r1 = 1
>       30:       15 01 f7 ff 00 00 00 00 if r1 == 0 goto -9 <LBB3_7>
>       31:       b7 01 00 00 00 00 00 00 r1 = 0
>       32:       0f 10 00 00 00 00 00 00 r0 += r1
>       33:       bf 01 00 00 00 00 00 00 r1 = r0
>       34:       05 00 e9 ff 00 00 00 00 goto -23 <LBB3_6>
>
> The error message is like this:
>
>   libbpf: prog 'collect_lock_syms': BPF program load failed: Invalid argument
>   libbpf: prog 'collect_lock_syms': -- BEGIN PROG LOAD LOG --
>   reg type unsupported for arg#0 function collect_lock_syms#380
>   0: R1=ctx(off=0,imm=0) R10=fp0
>   ; int BPF_PROG(collect_lock_syms)
>   0: (b7) r6 = 0                        ; R6_w=0
>   1: (b7) r7 = 0                        ; R7_w=0
>   2: (b7) r9 = 1                        ; R9_w=1
>   3: <invalid CO-RE relocation>
>   failed to resolve CO-RE relocation <byte_off> [381] struct rq___old.lock (0:0 @ offset 0)
>   processed 4 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
>   -- END PROG LOAD LOG --
>   libbpf: prog 'collect_lock_syms': failed to load: -22
>   libbpf: failed to load object 'lock_contention_bpf'
>   libbpf: failed to load BPF skeleton 'lock_contention_bpf': -22
>   Failed to load lock-contention BPF skeleton
>   lock contention BPF setup failed
>
>

Ok, I didn't manage to force compiler to behave as long as
`&rq_old->lock` pattern was used. So I went for a different approach.
This works:

$ git diff
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c
b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 8911e2a077d8..8d3cfbb3cc65 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -418,32 +418,32 @@ int contention_end(u64 *ctx)

 extern struct rq runqueues __ksym;

-struct rq__old {
+struct rq___old {
        raw_spinlock_t lock;
 } __attribute__((preserve_access_index));

-struct rq__new {
+struct rq___new {
        raw_spinlock_t __lock;
 } __attribute__((preserve_access_index));

 SEC("raw_tp/bpf_test_finish")
 int BPF_PROG(collect_lock_syms)
 {
-       __u64 lock_addr;
+       __u64 lock_addr, lock_off;
        __u32 lock_flag;

+       if (bpf_core_field_exists(struct rq___new, __lock))
+               lock_off = offsetof(struct rq___new, __lock);
+       else
+               lock_off = offsetof(struct rq___old, lock);
+
        for (int i = 0; i < MAX_CPUS; i++) {
                struct rq *rq = bpf_per_cpu_ptr(&runqueues, i);
-               struct rq__new *rq_new = (void *)rq;
-               struct rq__old *rq_old = (void *)rq;

                if (rq == NULL)
                        break;

-               if (bpf_core_field_exists(rq_new->__lock))
-                       lock_addr = (__u64)&rq_new->__lock;
-               else
-                       lock_addr = (__u64)&rq_old->lock;
+               lock_addr = (__u64)(void *)rq + lock_off;
                lock_flag = LOCK_CLASS_RQLOCK;
                bpf_map_update_elem(&lock_syms, &lock_addr,
&lock_flag, BPF_ANY);
        }


> Thanks,
> Namhyung
>
>




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux