llvm change [1] made a change such that __sync_fetch_and_{and,or,xor}() will generate atomic_fetch_*() insns even if the return value is not used. This is a deliberate choice to make sure barrier semantics are preserved from source code to asm insn. But the change in [1] caused arena_atomics selftest failure. test_arena_atomics:PASS:arena atomics skeleton open 0 nsec libbpf: prog 'and': BPF program load failed: Permission denied libbpf: prog 'and': -- BEGIN PROG LOAD LOG -- arg#0 reference type('UNKNOWN ') size cannot be determined: -22 0: R1=ctx() R10=fp0 ; if (pid != (bpf_get_current_pid_tgid() >> 32)) @ arena_atomics.c:87 0: (18) r1 = 0xffffc90000064000 ; R1_w=map_value(map=arena_at.bss,ks=4,vs=4) 2: (61) r6 = *(u32 *)(r1 +0) ; R1_w=map_value(map=arena_at.bss,ks=4,vs=4) R6_w=scalar(smin=0,smax=umax=0xffffffff,v ar_off=(0x0; 0xffffffff)) 3: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar() 4: (77) r0 >>= 32 ; R0_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) 5: (5d) if r0 != r6 goto pc+11 ; R0_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff)) R6_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0x) ; __sync_fetch_and_and(&and64_value, 0x011ull << 32); @ arena_atomics.c:91 6: (18) r1 = 0x100000000060 ; R1_w=scalar() 8: (bf) r1 = addr_space_cast(r1, 0, 1) ; R1_w=arena 9: (18) r2 = 0x1100000000 ; R2_w=0x1100000000 11: (db) r2 = atomic64_fetch_and((u64 *)(r1 +0), r2) BPF_ATOMIC stores into R1 arena is not allowed processed 9 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 -- END PROG LOAD LOG -- libbpf: prog 'and': failed to load: -13 libbpf: failed to load object 'arena_atomics' libbpf: failed to load BPF skeleton 'arena_atomics': -13 test_arena_atomics:FAIL:arena atomics skeleton load unexpected error: -13 (errno 13) #3 arena_atomics:FAIL The reason of the failure is due to [2] where atomic{64,}_fetch_{and,or,xor}() are not allowed by arena addresses. Version 2 of the patch fixed the issue by using inline asm ([3]). But further discussion suggested to find a way from source to generate locked insn which is more user friendly. So in not-merged llvm patch ([4]), if relax memory ordering is used and the return value is not used, locked insn could be generated. So with llvm patch [4] to compile the bpf selftest, the following code __c11_atomic_fetch_and(&and64_value, 0x011ull << 32, memory_order_relaxed); is able to generate locked insn, hence fixing the selftest failure. [1] https://github.com/llvm/llvm-project/pull/106494 [2] d503a04f8bc0 ("bpf: Add support for certain atomics in bpf_arena to x86 JIT") [3] https://lore.kernel.org/bpf/20240803025928.4184433-1-yonghong.song@xxxxxxxxx/ [4] https://github.com/llvm/llvm-project/pull/107343 Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx> --- .../selftests/bpf/progs/arena_atomics.c | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) Changelogs: v2 -> v3: - use c11 atomic functions. when relaxed memory ordering is specified and return value is not used, locked insn will be generated and this can make selftests pass. v1 -> v2: - Add __BPF_FEATURE_ADDR_SPACE_CAST to guard newly added asm codes for llvm >= 19 diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index bb0acd79d28a..40dd57fca5cc 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -4,6 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <stdbool.h> +#include <stdatomic.h> #include "bpf_arena_common.h" struct { @@ -77,8 +78,13 @@ int sub(const void *ctx) return 0; } +#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING +_Atomic __u64 __arena_global and64_value = (0x110ull << 32); +_Atomic __u32 __arena_global and32_value = 0x110; +#else __u64 __arena_global and64_value = (0x110ull << 32); __u32 __arena_global and32_value = 0x110; +#endif SEC("raw_tp/sys_enter") int and(const void *ctx) @@ -86,16 +92,25 @@ int and(const void *ctx) if (pid != (bpf_get_current_pid_tgid() >> 32)) return 0; #ifdef ENABLE_ATOMICS_TESTS - +#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING + __c11_atomic_fetch_and(&and64_value, 0x011ull << 32, memory_order_relaxed); + __c11_atomic_fetch_and(&and32_value, 0x011, memory_order_relaxed); +#else __sync_fetch_and_and(&and64_value, 0x011ull << 32); __sync_fetch_and_and(&and32_value, 0x011); +#endif #endif return 0; } +#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING +_Atomic __u32 __arena_global or32_value = 0x110; +_Atomic __u64 __arena_global or64_value = (0x110ull << 32); +#else __u32 __arena_global or32_value = 0x110; __u64 __arena_global or64_value = (0x110ull << 32); +#endif SEC("raw_tp/sys_enter") int or(const void *ctx) @@ -103,15 +118,25 @@ int or(const void *ctx) if (pid != (bpf_get_current_pid_tgid() >> 32)) return 0; #ifdef ENABLE_ATOMICS_TESTS +#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING + __c11_atomic_fetch_or(&or64_value, 0x011ull << 32, memory_order_relaxed); + __c11_atomic_fetch_or(&or32_value, 0x011, memory_order_relaxed); +#else __sync_fetch_and_or(&or64_value, 0x011ull << 32); __sync_fetch_and_or(&or32_value, 0x011); +#endif #endif return 0; } +#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING +_Atomic __u64 __arena_global xor64_value = (0x110ull << 32); +_Atomic __u32 __arena_global xor32_value = 0x110; +#else __u64 __arena_global xor64_value = (0x110ull << 32); __u32 __arena_global xor32_value = 0x110; +#endif SEC("raw_tp/sys_enter") int xor(const void *ctx) @@ -119,8 +144,13 @@ int xor(const void *ctx) if (pid != (bpf_get_current_pid_tgid() >> 32)) return 0; #ifdef ENABLE_ATOMICS_TESTS +#ifdef __BPF_FEATURE_ATOMIC_MEM_ORDERING + __c11_atomic_fetch_xor(&xor64_value, 0x011ull << 32, memory_order_relaxed); + __c11_atomic_fetch_xor(&xor32_value, 0x011, memory_order_relaxed); +#else __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); __sync_fetch_and_xor(&xor32_value, 0x011); +#endif #endif return 0; -- 2.43.5