Re: [PATCH v2 bpf-next 12/13] bpf: Add tests for new BPF atomic operations

From: Yonghong Song
Date: Tue Dec 01 2020 - 12:25:43 EST




On 12/1/20 4:56 AM, Brendan Jackman wrote:
On Mon, Nov 30, 2020 at 07:55:02PM -0800, Yonghong Song wrote:
On 11/27/20 9:57 AM, Brendan Jackman wrote:
[...]
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3d5940cd110d..5eadfd09037d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -228,6 +228,12 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+# Determine if Clang supports BPF arch v4, and therefore atomics.
+CLANG_SUPPORTS_V4=$(if $(findstring v4,$(shell $(CLANG) --target=bpf -mcpu=? 2>&1)),true,)
+ifeq ($(CLANG_SUPPORTS_V4),true)
+ CFLAGS += -DENABLE_ATOMICS_TESTS
+endif
+
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
@@ -250,7 +256,9 @@ define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+ $(LLC) -mattr=dwarfris -march=bpf \
+ -mcpu=$(if $(CLANG_SUPPORTS_V4),v4,v3) \
+ $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
@@ -391,7 +399,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(if $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,)

If the compiler indeed supports cpu v4 (i.e., atomic insns),
-DENABLE_ATOMICS_TESTS will be added to TRUNNER_BPF_FLAGS and
eventually -DENABLE_ATOMICS_TESTS is also available for
no-alu32 test and this will cause compilation error.

I did the following hack to workaround the issue, i.e., only adds
the definition to default (alu32) test run.

index 5eadfd09037d..3d1320fd93eb 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -230,9 +230,6 @@ MENDIAN=$(if
$(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)

# Determine if Clang supports BPF arch v4, and therefore atomics.
CLANG_SUPPORTS_V4=$(if $(findstring v4,$(shell $(CLANG) --target=bpf
-mcpu=? 2>&1)),true,)
-ifeq ($(CLANG_SUPPORTS_V4),true)
- CFLAGS += -DENABLE_ATOMICS_TESTS
-endif

CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
@@ -255,6 +252,7 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
$(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(if $(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,) \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf \
-mcpu=$(if $(CLANG_SUPPORTS_V4),v4,v3) \
@@ -399,7 +397,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c
trace_helpers.c \
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(if
$(CLANG_SUPPORTS_V4),-DENABLE_ATOMICS_TESTS,)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
TRUNNER_BPF_LDFLAGS := -mattr=+alu32
$(eval $(call DEFINE_TEST_RUNNER,test_progs))

Ah, good point. I think your "hack" actually improves the overall result
anyway since it avoids the akward global mutation of CFLAGS. Thanks!

I wonder if we should actually have Clang define a built-in macro to say
that the atomics are supported?

We are using gcc builtin's and they are all supported by clang, so
"#if __has_builtin(__sync_fetch_and_or)" is always true so it
won't work here.

We could add a macro like __BPF_ATOMICS_SUPPORTED__ in clang.
But you still need a checking to decide whether to use -mcpu=v4. If
you have that information, it will be trivial to add your
own macros if it is -mcpu=v4.


diff --git a/tools/testing/selftests/bpf/prog_tests/atomics_test.c b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
new file mode 100644
index 000000000000..8ecc0392fdf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics_test.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#ifdef ENABLE_ATOMICS_TESTS
+
+#include "atomics_test.skel.h"
+
+static void test_add(void)
[...]
+
+#endif /* ENABLE_ATOMICS_TESTS */
diff --git a/tools/testing/selftests/bpf/progs/atomics_test.c b/tools/testing/selftests/bpf/progs/atomics_test.c
[...]
+__u64 xor64_value = (0x110ull << 32);
+__u64 xor64_result = 0;
+__u32 xor32_value = 0x110;
+__u32 xor32_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(xor, int a)
+{
+ xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
+ xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011);
+
+ return 0;
+}

All above __sync_fetch_and_{add, sub, and, or, xor} produces a return
value used later. To test atomic_<op> instructions, it will be good if
you can add some tests which ignores the return value.

Good idea - adding an extra case to each prog. This won't assert that
LLVM is generating "optimal" code (without BPF_FETCH) but we can at
least get some confidence we aren't generating total garbage.