On Mon, Nov 30, 2020 at 07:55:02PM -0800, Yonghong Song wrote: > On 11/27/20 9:57 AM, Brendan Jackman wrote: [...] > > diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile > > index 3d5940cd110d..5eadfd09037d 100644 > > --- a/tools/testing/selftests/bpf/Makefile > > +++ b/tools/testing/selftests/bpf/Makefile > > @@ -228,6 +228,12 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ > > grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') > > MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) > > +# Determine if Clang supports BPF arch v4, and therefore atomics. > > +CLANG_SUPPORTS_V4=$(if $(findstring v4,$(shell $(CLANG) --target=bpf -mcpu=? 2>&1)),true,) > > +ifeq ($(CLANG_SUPPORTS_V4),true) > > + CFLAGS += -DENABLE_ATOMICS_TESTS > > +endif > > + > > CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) > > BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ > > -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ > > @@ -250,7 +256,9 @@ define CLANG_BPF_BUILD_RULE > > $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) > > $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ > > -c $1 -o - || echo "BPF obj compilation failed") | \ > > - $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2 > > + $(LLC) -mattr=dwarfris -march=bpf \ > > + -mcpu=$(if $(CLANG_SUPPORTS_V4),v4,v3) \ > > + $4 -filetype=obj -o $2 > > endef > > # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 > > define CLANG_NOALU32_BPF_BUILD_RULE > > @@ -391,7 +399,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ > > TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ > > $(wildcard progs/btf_dump_test_case_*.c) > > TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE > > -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) > > +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(if $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,) > > If the compiler indeed supports cpu v4 (i.e., atomic insns), > -DENABLE_ATOMICS_TESTS will be added to TRUNNER_BPF_FLAGS and > eventually -DENABLE_ATOMICS_TESTS is also available for > no-alu32 test and this will cause compilation error. > > I did the following hack to workaround the issue, i.e., only adds > the definition to default (alu32) test run. > > index 5eadfd09037d..3d1320fd93eb 100644 > --- a/tools/testing/selftests/bpf/Makefile > +++ b/tools/testing/selftests/bpf/Makefile > @@ -230,9 +230,6 @@ MENDIAN=$(if > $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) > > # Determine if Clang supports BPF arch v4, and therefore atomics. > CLANG_SUPPORTS_V4=$(if $(findstring v4,$(shell $(CLANG) --target=bpf > -mcpu=? 2>&1)),true,) > -ifeq ($(CLANG_SUPPORTS_V4),true) > - CFLAGS += -DENABLE_ATOMICS_TESTS > -endif > > CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) > BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ > @@ -255,6 +252,7 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h > define CLANG_BPF_BUILD_RULE > $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) > $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \ > + $(if $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,) \ > -c $1 -o - || echo "BPF obj compilation failed") | \ > $(LLC) -mattr=dwarfris -march=bpf \ > -mcpu=$(if $(CLANG_SUPPORTS_V4),v4,v3) \ > @@ -399,7 +397,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c > trace_helpers.c \ > TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ > $(wildcard progs/btf_dump_test_case_*.c) > TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE > -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(if > $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,) > +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) > TRUNNER_BPF_LDFLAGS := -mattr=+alu32 > $(eval $(call DEFINE_TEST_RUNNER,test_progs)) Ah, good point. I think your "hack" actually improves the overall result anyway since it avoids the akward global mutation of CFLAGS. Thanks! I wonder if we should actually have Clang define a built-in macro to say that the atomics are supported? > > diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c b/tools/testing/selftests/bpf/prog_tests/atomics_test.c > > new file mode 100644 > > index 000000000000..8ecc0392fdf9 > > --- /dev/null > > +++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c > > @@ -0,0 +1,329 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > + > > +#include <test_progs.h> > > + > > +#ifdef ENABLE_ATOMICS_TESTS > > + > > +#include "atomics_test.skel.h" > > + > > +static void test_add(void) > [...] > > + > > +#endif /* ENABLE_ATOMICS_TESTS */ > > diff --git a/tools/testing/selftests/bpf/progs/atomics_test.c b/tools/testing/selftests/bpf/progs/atomics_test.c [...] > > +__u64 xor64_value = (0x110ull << 32); > > +__u64 xor64_result = 0; > > +__u32 xor32_value = 0x110; > > +__u32 xor32_result = 0; > > +SEC("fentry/bpf_fentry_test1") > > +int BPF_PROG(xor, int a) > > +{ > > + xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); > > + xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); > > + > > + return 0; > > +} > > All above __sync_fetch_and_{add, sub, and, or, xor} produces a return > value used later. To test atomic_<op> instructions, it will be good if > you can add some tests which ignores the return value. Good idea - adding an extra case to each prog. This won't assert that LLVM is generating "optimal" code (without BPF_FETCH) but we can at least get some confidence we aren't generating total garbage.