[PATCH bpf-next 2/2] selftests/bpf: Add test for bpf_probe_write_user_registered()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Introduce a BPF test program and user space code to test
bpf_probe_write_user_registered().

The test program also demonstrates 2 ways a BPF program may obtain the
addresses it can write to: either by tracing prctl() or simply accessing
current->bpf_user_writable directly.

Signed-off-by: Marco Elver <elver@xxxxxxxxxx>
---
 .../prog_tests/probe_write_user_registered.c  | 325 ++++++++++++++++++
 .../progs/test_probe_write_user_registered.c  | 219 ++++++++++++
 2 files changed, 544 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c

diff --git a/tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c b/tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c
new file mode 100644
index 000000000000..78ac0756d365
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_write_user_registered.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Google LLC. */
+
+#include <malloc.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/prctl.h>
+#include <time.h>
+
+#include <test_progs.h>
+#include "test_probe_write_user_registered.skel.h"
+
+#define TEST_TAG 0xf23c39ab
+
+/* Encoding of the test access-type in the tv_nsec parameter. */
+enum test_access {
+	TEST_SUB_REGION,
+	TEST_EQ_REGION,
+	TEST_ONE_BY_ONE,
+	TEST_ANY_TAG,
+};
+
+/* This will be written to by the BPF program. */
+struct test_data {
+	volatile uint64_t padding_start;
+	volatile uint64_t nanosleep_arg;
+	volatile uint64_t padding_end;
+};
+
+static struct test_data test_data;
+
+static void prctl_register_writable(const volatile void *start, size_t size, uint32_t tag)
+{
+	ASSERT_OK(prctl(PR_BPF_REGISTER_WRITABLE, start, size, tag, 0), __func__);
+}
+
+static void prctl_unregister_writable(const volatile void *start, size_t size)
+{
+	ASSERT_OK(prctl(PR_BPF_UNREGISTER_WRITABLE, start, size, 0, 0), __func__);
+}
+
+/* Returns the actual tv_nsec value derived from base and test_access. */
+static uint64_t do_nanosleep(uint64_t base, enum test_access test_access)
+{
+	const uint64_t tv_nsec = base << 8 | test_access;
+	struct timespec ts = {};
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = tv_nsec;
+	syscall(__NR_nanosleep, &ts, NULL);
+
+	return tv_nsec;
+}
+
+/*
+ * Test that the basic usage works: register, write from BPF program,
+ * unregister, after which no more writes can happen.
+ */
+static void test_register_and_unregister(struct test_probe_write_user_registered *skel)
+{
+	uint64_t nsec = 1234;
+	uint64_t expect;
+
+	prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+
+	/* Check that we see the writes. */
+	for (int i = 0; i < 3; ++i) {
+		test_data.nanosleep_arg = 0;
+		expect = do_nanosleep(++nsec, TEST_SUB_REGION);
+		ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+		ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+	}
+
+	/* Registered the whole region, so this should also work... */
+	for (int i = 0; i < 3; ++i) {
+		test_data.nanosleep_arg = 0;
+		expect = do_nanosleep(++nsec, TEST_EQ_REGION);
+		ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+		ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+	}
+
+	prctl_unregister_writable(&test_data, sizeof(test_data));
+
+	/* No more writes after unregistration. */
+	test_data.nanosleep_arg = 0;
+	do_nanosleep(++nsec, TEST_SUB_REGION);
+	ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+}
+
+/*
+ * Test that accesses with mismatching tags fail.
+ */
+static void test_bad_tag(struct test_probe_write_user_registered *skel)
+{
+	uint64_t expect;
+
+	prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+	test_data.nanosleep_arg = 0;
+	expect = do_nanosleep(1234, TEST_SUB_REGION);
+	ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+	do_nanosleep(9999, TEST_ANY_TAG); /* fails */
+	ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+	prctl_unregister_writable(&test_data, sizeof(test_data));
+}
+
+/*
+ * Test that the "any" (zero) tag works.
+ */
+static void test_any_tag(struct test_probe_write_user_registered *skel)
+{
+	uint64_t nsec = 1234;
+	uint64_t expect;
+
+	prctl_register_writable(&test_data, sizeof(test_data), 0);
+
+	for (int i = 0; i < 3; ++i) {
+		test_data.nanosleep_arg = 0;
+		expect = do_nanosleep(++nsec, TEST_ANY_TAG);
+		ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+		ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+	}
+
+	prctl_unregister_writable(&test_data, sizeof(test_data));
+
+	test_data.nanosleep_arg = 0;
+	do_nanosleep(++nsec, TEST_ANY_TAG);
+	ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+}
+
+/*
+ * Test that invalid prctl() fail.
+ */
+static void test_invalid_prctl(struct test_probe_write_user_registered *skel)
+{
+	ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, NULL, 1, 0, 0), __func__);
+	ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &test_data, 0, 0, 0), __func__);
+	prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+	ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &test_data, sizeof(test_data), 0, 0), __func__);
+	ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &test_data, 2, 0, 0), __func__);
+	prctl_register_writable((void *)&test_data + 1, 1, TEST_TAG);
+	prctl_register_writable((void *)&test_data - 1, 1, TEST_TAG);
+
+	ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &test_data, 1, 0, 0), __func__);
+	prctl_unregister_writable((void *)&test_data - 1, 1);
+	prctl_unregister_writable(&test_data, sizeof(test_data));
+	prctl_unregister_writable((void *)&test_data + 1, 1);
+	ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, 0x123456, 1, 0, 0), __func__);
+	ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &test_data, sizeof(test_data), 0, 0), __func__);
+}
+
+/*
+ * Test that we can register multiple regions and they all work.
+ */
+static void test_multiple_region(struct test_probe_write_user_registered *skel)
+{
+	uint64_t expect;
+
+	prctl_register_writable(&test_data.nanosleep_arg, sizeof(uint64_t), TEST_TAG);
+	prctl_register_writable(&test_data.padding_end, sizeof(uint64_t), TEST_TAG);
+	/* First one last, so the test program knows where to start. */
+	prctl_register_writable(&test_data.padding_start, sizeof(uint64_t), TEST_TAG);
+
+	memset(&test_data, 0, sizeof(test_data));
+	do_nanosleep(0xf00d, TEST_EQ_REGION); /* fails */
+	ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 1, __func__); /* found first */
+
+	expect = do_nanosleep(0xf33d, TEST_ONE_BY_ONE);
+	ASSERT_EQ(test_data.padding_start, expect, __func__);
+	ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+	ASSERT_EQ(test_data.padding_end, expect, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+
+	prctl_unregister_writable(&test_data.padding_start, sizeof(uint64_t));
+	prctl_unregister_writable(&test_data.nanosleep_arg, sizeof(uint64_t));
+	prctl_unregister_writable(&test_data.padding_end, sizeof(uint64_t));
+}
+
+static void *test_thread_func(void *arg)
+{
+	struct test_probe_write_user_registered *skel = arg;
+
+	/* If this fails, the thread didn't inherit the region. */
+	ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &test_data, sizeof(test_data), 0, 0), __func__);
+	/* So that the BPF user_writable task storage is filled. */
+	prctl_register_writable(&test_data, 1, TEST_TAG);
+	prctl_unregister_writable(&test_data, 1);
+
+	/* Test that there really is no way it'll write. */
+	test_data.nanosleep_arg = 0;
+	do_nanosleep(9999, TEST_SUB_REGION); /* fails */
+	ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 0, __func__);
+
+	return NULL;
+}
+
+/*
+ * Test that threads (CLONE_VM) do not inherit writable regions.
+ */
+static void test_thread(struct test_probe_write_user_registered *skel)
+{
+	uint64_t expect;
+	pthread_t tid;
+
+	prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+
+	test_data.nanosleep_arg = 0;
+	expect = do_nanosleep(1234, TEST_SUB_REGION);
+	ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+
+	ASSERT_OK(pthread_create(&tid, NULL, test_thread_func, skel), "pthread_create");
+	ASSERT_OK(pthread_join(tid, NULL), "pthread_join");
+
+	ASSERT_EQ(test_data.nanosleep_arg, 0, __func__);
+	prctl_unregister_writable(&test_data, sizeof(test_data));
+}
+
+/*
+ * Test that fork() does inherit writable regions.
+ */
+static void test_fork(struct test_probe_write_user_registered *skel)
+{
+	uint64_t expect;
+	int pid, status;
+
+	prctl_register_writable(&test_data, sizeof(test_data), TEST_TAG);
+
+	test_data.nanosleep_arg = 0;
+	expect = do_nanosleep(1234, TEST_SUB_REGION);
+	ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+	ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+
+	pid = fork();
+	if (!pid) {
+		test_data.nanosleep_arg = 0; /* write prefault */
+		expect = do_nanosleep(3333, TEST_SUB_REGION);
+		ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+		exit(!ASSERT_EQ(test_data.nanosleep_arg, expect, __func__));
+	}
+
+	status = -1;
+	waitpid(pid, &status, 0);
+	ASSERT_EQ(status, 0, __func__);
+
+	ASSERT_EQ(test_data.nanosleep_arg, expect, __func__);
+	prctl_unregister_writable(&test_data, sizeof(test_data));
+}
+
+/*
+ * Test that the kernel can allocate lots of regions and find them.
+ */
+static void test_stress_regions(struct test_probe_write_user_registered *skel)
+{
+	const int STRESS_SIZE = 200;
+	struct test_data *large = malloc(STRESS_SIZE * sizeof(*large));
+	uint64_t expect;
+
+	ASSERT_NEQ(large, NULL, __func__);
+
+	memset(large, 0, STRESS_SIZE * sizeof(*large));
+
+	for (int i = 0; i < STRESS_SIZE; ++i) {
+		prctl_register_writable(&large[i], sizeof(*large), TEST_TAG);
+		ASSERT_ERR(prctl(PR_BPF_REGISTER_WRITABLE, &large[i], sizeof(*large), 0, 0), __func__);
+		expect = do_nanosleep(777, TEST_SUB_REGION);
+		ASSERT_EQ(large[i].nanosleep_arg, expect, __func__);
+		ASSERT_EQ(skel->data->found_user_registered, 1, __func__);
+	}
+
+	for (int i = 0; i < STRESS_SIZE; ++i) {
+		prctl_unregister_writable(&large[i], sizeof(*large));
+		ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &large[i], sizeof(*large), 0, 0), __func__);
+		large[i].nanosleep_arg = 0;
+		do_nanosleep(1992, TEST_SUB_REGION); /* no more writes */
+		ASSERT_EQ(large[i].nanosleep_arg, 0, __func__);
+		ASSERT_EQ(skel->data->found_user_registered, i < STRESS_SIZE - 1 ? 1 : 0, __func__);
+	}
+
+	for (int i = 0; i < STRESS_SIZE; ++i)
+		ASSERT_ERR(prctl(PR_BPF_UNREGISTER_WRITABLE, &large[i], sizeof(*large), 0, 0), __func__);
+
+	free(large);
+}
+
+/*
+ * Test setup.
+ */
+void test_probe_write_user_registered(void)
+{
+	struct test_probe_write_user_registered *skel;
+
+	skel = test_probe_write_user_registered__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load"))
+		return;
+
+	if (!ASSERT_OK(test_probe_write_user_registered__attach(skel), "attach"))
+		goto cleanup;
+
+	if (test__start_subtest("register_and_unregister"))
+		test_register_and_unregister(skel);
+	if (test__start_subtest("bad_tag"))
+		test_bad_tag(skel);
+	if (test__start_subtest("any_tag"))
+		test_any_tag(skel);
+	if (test__start_subtest("invalid_prctl"))
+		test_invalid_prctl(skel);
+	if (test__start_subtest("multiple_region"))
+		test_multiple_region(skel);
+	if (test__start_subtest("thread"))
+		test_thread(skel);
+	if (test__start_subtest("fork"))
+		test_fork(skel);
+	if (test__start_subtest("stress_regions"))
+		test_stress_regions(skel);
+
+cleanup:
+	test_probe_write_user_registered__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c b/tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c
new file mode 100644
index 000000000000..9174ff2e36f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_write_user_registered.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Google LLC. */
+#include "vmlinux.h"
+#include <asm/unistd.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/*
+ * We just need the CLONE_VM definition. Without __ASSEMBLY__ sched.h would
+ * redefine clone_args, which is already defined by vmlinux.h
+ */
+#define __ASSEMBLY__
+#include <linux/sched.h>
+#undef __ASSEMBLY__
+
+#define TEST_TAG 0xf23c39ab
+
+/* Encoding of the test access-type in the tv_nsec parameter. */
+enum test_access {
+	TEST_SUB_REGION,
+	TEST_EQ_REGION,
+	TEST_ONE_BY_ONE,
+	TEST_ANY_TAG,
+};
+#define TEST_ACCESS(nsec) ((enum test_access)((nsec) & 0xff))
+
+struct test_data {
+	__u64 padding_start;
+	__u64 nanosleep_arg;
+	__u64 padding_end;
+};
+
+struct user_writable {
+	void *start;
+	size_t size;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct user_writable);
+} user_writable SEC(".maps");
+
+int found_user_registered = -1;
+
+/*
+ * This is used to test that the contents of per-task bpf_user_writable is sane.
+ *
+ * It also demonstrates another way (vs. prctl()) how the BPF program can obtain
+ * addresses associated with a tag. Beware, however, that this is O(#registered)
+ * and a production BPF program should cache its result in task local storage.
+ */
+static int find_user_registered(__u32 tag, void *start)
+{
+	const struct bpf_user_writable *uw = bpf_get_current_task_btf()->bpf_user_writable;
+	int count = 0;
+
+	if (!uw)
+		return count;
+
+      /*
+       * Ensure termination of the loop to make the verifier happy. Use
+       * bpf_loop() if you expect a very large number of registered regions.
+       */
+	for (__u32 idx = 0; idx < uw->size && idx < 1024; ++idx) {
+		if (uw->entries[idx].tag == tag && uw->entries[idx].start == start)
+			count++;
+	}
+
+	return count;
+}
+
+static void sys_nanosleep(struct pt_regs *regs)
+{
+	struct __kernel_timespec *ts;
+	struct user_writable *w;
+	__u32 dummy = -99;
+	__u64 tv_nsec;
+	int err;
+
+	_Static_assert(sizeof(ts->tv_nsec) == sizeof(tv_nsec), "ABI");
+
+	found_user_registered = -1;
+
+	w = bpf_task_storage_get(&user_writable, bpf_get_current_task_btf(), 0, 0);
+	if (!w)
+		return;
+
+	ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec))
+		return;
+
+	found_user_registered = find_user_registered(TEST_TAG, w->start);
+
+	bpf_printk("doing test accesses");
+
+	/*
+	 * Test failing accesses before, so that if they actually succeed, we
+	 * won't do the real write and the test will detect a missed write.
+	 */
+	if (!bpf_probe_write_user_registered(w->start + w->size - 1, &dummy, sizeof(dummy), TEST_TAG))
+		return;
+	if (!bpf_probe_write_user_registered(w->start - 1, &dummy, sizeof(dummy), TEST_TAG))
+		return;
+	if (!bpf_probe_write_user_registered(w->start + 100, &dummy, sizeof(dummy), TEST_TAG))
+		return;
+	if (TEST_ACCESS(tv_nsec) != TEST_ANY_TAG) {
+		if (!bpf_probe_write_user_registered(w->start, &dummy, sizeof(dummy), 123))
+			return;
+		if (!bpf_probe_write_user_registered(w->start, &dummy, sizeof(dummy), 0))
+			return;
+	}
+
+	switch (TEST_ACCESS(tv_nsec)) {
+	case TEST_SUB_REGION:
+		bpf_printk("sub region write");
+		err = bpf_probe_write_user_registered(w->start + sizeof(__u64), &tv_nsec, sizeof(tv_nsec), TEST_TAG);
+		break;
+	case TEST_EQ_REGION: {
+		struct test_data out = {};
+
+		bpf_printk("whole region write");
+		out.nanosleep_arg = tv_nsec;
+		err = bpf_probe_write_user_registered(w->start, &out, sizeof(out), TEST_TAG);
+		break;
+	}
+	case TEST_ONE_BY_ONE:
+		bpf_printk("write one by one");
+		for (int i = 0; i < 3; ++i) {
+			err = bpf_probe_write_user_registered(w->start + i * sizeof(__u64), &tv_nsec,
+							      sizeof(tv_nsec), TEST_TAG);
+			if (err)
+				break;
+		}
+		break;
+	case TEST_ANY_TAG:
+		bpf_printk("any tag write");
+		err = bpf_probe_write_user_registered(w->start + sizeof(__u64), &tv_nsec, sizeof(tv_nsec), 93845);
+		break;
+	default:
+		bpf_printk("unknown access method");
+		return;
+	}
+
+	if (err)
+		bpf_printk("write failed: %d", err);
+	else
+		bpf_printk("write success");
+}
+
+static void sys_prctl(struct pt_regs *regs)
+{
+	struct user_writable *w;
+	__u32 tag;
+
+	if (PT_REGS_PARM1_CORE_SYSCALL(regs) != /*PR_BPF_REGISTER_WRITABLE*/71)
+		return;
+
+	tag = (__u32)PT_REGS_PARM4_CORE_SYSCALL(regs);
+	if (tag && tag != TEST_TAG)
+		return;
+
+	w = bpf_task_storage_get(&user_writable, bpf_get_current_task_btf(), 0,
+				 BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!w)
+		return;
+
+	bpf_printk("registered user writable region with tag %x", tag);
+	w->start = (void *)PT_REGS_PARM2_CORE_SYSCALL(regs);
+	w->size = PT_REGS_PARM3_CORE_SYSCALL(regs);
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+	switch (id) {
+	case __NR_prctl:
+		sys_prctl(regs);
+		break;
+	case __NR_nanosleep:
+		sys_nanosleep(regs);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/*
+ * The user writable region is copied on fork(). Also copy the per-task map we
+ * use in this test.
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_newtask, struct task_struct *t, unsigned long clone_flags)
+{
+	const struct user_writable *src;
+	struct user_writable *dst;
+
+	if (clone_flags & CLONE_VM)
+		return 0;
+
+	src = bpf_task_storage_get(&user_writable, bpf_get_current_task_btf(), 0, 0);
+	if (!src)
+		return 0;
+
+	dst = bpf_task_storage_get(&user_writable, t, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!dst) {
+		bpf_printk("failed to copy user_writable on fork()");
+		return 0;
+	}
+	*dst = *src;
+	bpf_printk("fork copied user writable region");
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.44.0.478.gd926399ef9-goog





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux