Em Thu, Aug 10, 2023 at 11:48:51AM -0700, Ian Rogers escreveu: > Previously a BPF event of augmented_raw_syscalls.c could be used to > enable augmentation of syscalls by perf trace. As BPF events are no > longer supported, switch to using a BPF skeleton which when attached > explicitly opens the sysenter and sysexit tracepoints. > > The dump map is removed as debugging wasn't supported by the > augmentation and bpf_printk can be used when necessary. > > Remove tools/perf/examples/bpf/augmented_raw_syscalls.c so that the > rename/migration to a BPF skeleton captures that this was the source. So, there is a problem where the augmented_raw_syscalls connect/sendto handlers are being rejected by the verifier, the way you did it makes it to print the verifier output and then continue without augmentation, unsure if this is a good default, opinions? [root@quaco ~]# perf trace -e open* libbpf: prog 'sys_enter_connect': BPF program load failed: Permission denied libbpf: prog 'sys_enter_connect': -- BEGIN PROG LOAD LOG -- reg type unsupported for arg#0 function sys_enter_connect#59 0: R1=ctx(off=0,imm=0) R10=fp0 ; int sys_enter_connect(struct syscall_enter_args *args) 0: (bf) r6 = r1 ; R1=ctx(off=0,imm=0) R6_w=ctx(off=0,imm=0) 1: (b7) r1 = 0 ; R1_w=0 ; int key = 0; 2: (63) *(u32 *)(r10 -4) = r1 ; R1_w=0 R10=fp0 fp-8=0000???? 3: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 ; 4: (07) r2 += -4 ; R2_w=fp-4 ; return bpf_map_lookup_elem(&augmented_args_tmp, &key); 5: (18) r1 = 0xffff8de5ae1d4600 ; R1_w=map_ptr(off=0,ks=4,vs=8272,imm=0) 7: (85) call bpf_map_lookup_elem#1 ; R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0) 8: (bf) r7 = r0 ; R0_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0) R7_w=map_value_or_null(id=1,off=0,ks=4,vs=8272,imm=0) 9: (b7) r0 = 1 ; R0_w=1 ; if (augmented_args == NULL) 10: (15) if r7 == 0x0 goto pc+25 ; R7_w=map_value(off=0,ks=4,vs=8272,imm=0) ; unsigned int socklen = args->args[2]; 11: (79) r1 = *(u64 *)(r6 +32) ; R1_w=scalar() R6_w=ctx(off=0,imm=0) ; 12: (bf) r2 = r1 ; R1_w=scalar(id=2) R2_w=scalar(id=2) 13: (67) r2 <<= 32 ; R2_w=scalar(smax=9223372032559808512,umax=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min=0,s32_max=0,u32_max=0) 14: (77) r2 >>= 32 ; R2_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 15: (b7) r8 = 128 ; R8=128 ; if (socklen > sizeof(augmented_args->saddr)) 16: (25) if r2 > 0x80 goto pc+1 ; R2=scalar(umax=128,var_off=(0x0; 0xff)) 17: (bf) r8 = r1 ; R1=scalar(id=2) R8_w=scalar(id=2) ; const void *sockaddr_arg = (const void *)args->args[1]; 18: (79) r3 = *(u64 *)(r6 +24) ; R3_w=scalar() R6=ctx(off=0,imm=0) ; bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 19: (bf) r1 = r7 ; R1_w=map_value(off=0,ks=4,vs=8272,imm=0) R7=map_value(off=0,ks=4,vs=8272,imm=0) 20: (07) r1 += 64 ; R1_w=map_value(off=64,ks=4,vs=8272,imm=0) ; bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); 21: (bf) r2 = r8 ; R2_w=scalar(id=2) R8_w=scalar(id=2) 22: (85) call bpf_probe_read#4 R2 min value is negative, either use unsigned or 'var &= const' processed 22 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1 -- END PROG LOAD LOG -- libbpf: prog 'sys_enter_connect': failed to load: -13 libbpf: failed to load object 'augmented_raw_syscalls_bpf' libbpf: failed to load BPF skeleton 'augmented_raw_syscalls_bpf': -13 0.000 systemd-oomd/959 openat(dfd: CWD, filename: 0xc0a2a2bd, flags: RDONLY|CLOEXEC) = 12 86.339 thermald/1234 openat(dfd: CWD, filename: 0xac000ba0) = 13 87.008 thermald/1234 openat(dfd: CWD, filename: 0xac000eb0) = 13 87.270 thermald/1234 openat(dfd: CWD, filename: 0xac000b70) = 13 89.657 thermald/1234 openat(dfd: CWD, filename: 0xac000eb0) = 13 ^C If I comment out the connect and sendto it doesn't build anymore, whereas before it would continue with the other handlers: CLANG /tmp/build/perf-tools-next/util/bpf_skel/.tmp/augmented_raw_syscalls.bpf.o GENSKEL /tmp/build/perf-tools-next/util/bpf_skel/augmented_raw_syscalls.skel.h CC /tmp/build/perf-tools-next/builtin-trace.o builtin-trace.c: In function ‘cmd_trace’: builtin-trace.c:4873:63: error: ‘struct <anonymous>’ has no member named ‘sys_enter_connect’; did you mean ‘sys_enter_openat’? 4873 | bpf_program__set_autoattach(trace.skel->progs.sys_enter_connect, | ^~~~~~~~~~~~~~~~~ | sys_enter_openat builtin-trace.c:4875:63: error: ‘struct <anonymous>’ has no member named ‘sys_enter_sendto’; did you mean ‘sys_enter_openat’? 4875 | bpf_program__set_autoattach(trace.skel->progs.sys_enter_sendto, | ^~~~~~~~~~~~~~~~ | sys_enter_openat make[3]: *** [/home/acme/git/perf-tools-next/tools/build/Makefile.build:97: /tmp/build/perf-tools-next/builtin-trace.o] Error 1 make[2]: *** [Makefile.perf:662: /tmp/build/perf-tools-next/perf-in.o] Error 2 make[1]: *** [Makefile.perf:238: sub-make] Error 2 make: *** [Makefile:113: install-bin] Error 2 make: Leaving directory '/home/acme/git/perf-tools-next/tools/perf' [acme@quaco perf-tools-next]$ I.e. no need for explicitely referring to those, I think in the past it was just looking if it was there and if so, attaching, I'll try to fix this. If I remove the explicit references in builtin-trace.c: [root@quaco ~]# perf trace -e open* --max-events=10 0.000 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj") = 13 0.236 thermald/1234 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj") = 13 0.334 thermald/1234 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone2/temp") = 13 9.092 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 259.212 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 497.464 gpm/1049 openat(dfd: CWD, filename: "/dev/tty0") = 4 509.044 systemd-oomd/959 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 509.559 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.pressure", flags: RDONLY|CLOEXEC) = 12 509.917 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.current", flags: RDONLY|CLOEXEC) = 12 510.111 systemd-oomd/959 openat(dfd: CWD, filename: "/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/session.slice/memory.min", flags: RDONLY|CLOEXEC) = 12 [root@quaco ~]# Cool! Some inception: [root@quaco ~]# perf trace -e perf_event_open perf stat -e cycles,instructions,cache-misses sleep 1 0.000 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3 0.063 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4 0.070 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x3 (PERF_COUNT_HW_CACHE_MISSES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 232297 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5 Performance counter stats for 'sleep 1': 2,669,464 cycles 1,842,319 instructions # 0.69 insn per cycle 27,716 cache-misses 1.001948592 seconds time elapsed 0.000000000 seconds user 0.001657000 seconds sys [root@quaco ~]# I'm putting what I have in the tmp.perf-tools-next branch, will continue later today. - Arnaldo