Add a few positive/negative tests to test bpf_rcu_read_lock() and its corresponding verifier support. Signed-off-by: Yonghong Song <yhs@xxxxxx> --- .../selftests/bpf/prog_tests/rcu_read_lock.c | 166 ++++++++ .../selftests/bpf/progs/rcu_read_lock.c | 366 ++++++++++++++++++ 2 files changed, 532 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c create mode 100644 tools/testing/selftests/bpf/progs/rcu_read_lock.c diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c new file mode 100644 index 000000000000..38ce62cde93b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/ + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <test_progs.h> +#include <bpf/btf.h> +#include "rcu_read_lock.skel.h" + +static void test_local_storage(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->target_pid = syscall(SYS_gettid); + + bpf_program__set_autoload(skel->progs.cgrp_succ, true); + bpf_program__set_autoload(skel->progs.task_succ, true); + bpf_program__set_autoload(skel->progs.two_regions, true); + bpf_program__set_autoload(skel->progs.non_sleepable_1, true); + bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto done; + + err = rcu_read_lock__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto done; + + syscall(SYS_getpgid); + + ASSERT_EQ(skel->bss->result, 2, "result"); +done: + rcu_read_lock__destroy(skel); +} + +static void test_runtime_diff_rcu_tag(void) +{ + struct rcu_read_lock *skel; + int err; + + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.dump_ipv6_route, true); + err = rcu_read_lock__load(skel); + ASSERT_OK(err, "skel_load"); + rcu_read_lock__destroy(skel); +} + +static void test_negative_region(void) +{ +#define NUM_REGION_FAILED_PROGS 6 + struct rcu_read_lock *skel; + struct bpf_program *prog; + int i, err; + + for (i = 0; i < NUM_REGION_FAILED_PROGS; i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + switch (i) { + case 0: + prog = skel->progs.miss_lock; + break; + case 1: + prog = skel->progs.miss_unlock; + break; + case 2: + prog = skel->progs.non_sleepable_rcu_mismatch; + break; + case 3: + prog = skel->progs.inproper_sleepable_helper; + break; + case 4: + prog = skel->progs.inproper_sleepable_kfunc; + break; + default: + prog = skel->progs.nested_rcu_region; + break; + } + + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_ERR(err, "skel_load")) { + rcu_read_lock__destroy(skel); + return; + } + } +} + +static void test_negative_rcuptr_misuse(void) +{ +#define NUM_RCUPTR_FAILED_PROGS 4 + struct rcu_read_lock *skel; + struct bpf_program *prog; + struct btf *vmlinux_btf; + int i, err, type_id; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) + return; + + /* skip the test if btf_type_tag("rcu") is not present in vmlinux */ + type_id = btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG); + if (type_id < 0) { + test__skip(); + return; + } + + for (i = 0; i < NUM_RCUPTR_FAILED_PROGS; i++) { + skel = rcu_read_lock__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + switch (i) { + case 0: + prog = skel->progs.cgrp_incorrect_rcu_region; + break; + case 1: + prog = skel->progs.task_incorrect_rcu_region1; + break; + case 2: + prog = skel->progs.task_incorrect_rcu_region2; + break; + default: + prog = skel->progs.cross_rcu_region; + break; + } + + bpf_program__set_autoload(prog, true); + err = rcu_read_lock__load(skel); + if (!ASSERT_ERR(err, "skel_load")) { + rcu_read_lock__destroy(skel); + return; + } + } +} + +void test_rcu_read_lock(void) +{ + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/rcu_read_lock"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) + return; + + if (test__start_subtest("local_storage")) + test_local_storage(); + if (test__start_subtest("runtime_diff_rcu_tag")) + test_runtime_diff_rcu_tag(); + if (test__start_subtest("negative_tests_region")) + test_negative_region(); + if (test__start_subtest("negative_tests_rcuptr_misuse")) + test_negative_rcuptr_misuse(); + + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c new file mode 100644 index 000000000000..32f9af19ea42 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_a SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} map_b SEC(".maps"); + +__u32 user_data, key_serial, target_pid = 0; +__u64 flags, result = 0; + +struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +void bpf_key_put(struct bpf_key *key) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int cgrp_succ(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + long init_val = 2; + long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* For this specific case, the below bpf_rcu_read_lock region + * protects rcu pointer memory access (cgroups) properly. + * But to access dfl_cgrp memory, a reference to cgroups->dfl_cgrp + * needs to be held so dfl_cgrp can keep valid outside the + * bpf_rcu_read_lock region. + * + * The current approach is to treat walked pointers as 'trusted' so + * we do not enclose all walked pointer/mem access in the + * bpf_rcu_read_lock region. For this particular case, the + * bpf_rcu_read_unlock can be placed right before the first + * 'return 0; or immediately after the second bpf_cgrp_storage_get() + * to protect dfl_cgrp as well. + */ + bpf_rcu_read_lock(); + cgroups = task->cgroups; + dfl_cgrp = cgroups->dfl_cgrp; + bpf_rcu_read_unlock(); + ptr = bpf_cgrp_storage_get(&map_a, dfl_cgrp, &init_val, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!ptr) + return 0; + ptr = bpf_cgrp_storage_get(&map_a, dfl_cgrp, 0, 0); + if (!ptr) + return 0; + result = *ptr; + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int task_succ(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* region including helper using rcu ptr */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int two_regions(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* two regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_1(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_2(void *ctx) +{ + struct task_struct *task, *real_parent; + + bpf_rcu_read_lock(); + task = bpf_get_current_task_btf(); + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?iter.s/ipv6_route") +int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct fib6_info *rt = ctx->rt; + const struct net_device *dev; + struct fib6_nh *fib6_nh; + unsigned int flags; + struct nexthop *nh; + + if (rt == (void *)0) + return 0; + + /* fib6_nh is not a rcu ptr */ + fib6_nh = &rt->fib6_nh[0]; + flags = rt->fib6_flags; + + nh = rt->nh; + bpf_rcu_read_lock(); + if (rt->nh) + /* fib6_nh is a rcu ptr */ + fib6_nh = &nh->nh_info->fib6_nh; + + /* fib6_nh could be a rcu or non-rcu ptr */ + if (fib6_nh->fib_nh_gw_family) { + flags |= RTF_GATEWAY; + BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6); + } else { + BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 "); + } + + dev = fib6_nh->fib_nh_dev; + bpf_rcu_read_unlock(); + if (dev) + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric, + rt->fib6_ref.refs.counter, 0, flags, dev->name); + else + BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric, + rt->fib6_ref.refs.counter, 0, flags); + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_lock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_lock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + cgroups = task->cgroups; + dfl_cgrp = cgroups->dfl_cgrp; + bpf_rcu_read_unlock(); + (void)bpf_cgrp_storage_get(&map_a, dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int miss_unlock(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* missing bpf_rcu_read_unlock() */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + cgroups = task->cgroups; + dfl_cgrp = cgroups->dfl_cgrp; + (void)bpf_cgrp_storage_get(&map_a, dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +int non_sleepable_rcu_mismatch(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + + /* non-sleepable: missing bpf_rcu_read_unlock() in one path */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (real_parent) + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int inproper_sleepable_helper(void *ctx) +{ + struct task_struct *task, *real_parent; + struct pt_regs *regs; + __u32 value = 0; + void *ptr; + + task = bpf_get_current_task_btf(); + + /* sleepable helper in rcu read lock region */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + regs = (struct pt_regs *)bpf_task_pt_regs(real_parent); + if (!regs) { + bpf_rcu_read_unlock(); + return 0; + } + + ptr = (void *)PT_REGS_IP(regs); + (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0); + user_data = value; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?lsm.s/bpf") +int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct bpf_key *bkey; + + /* sleepable kfunc in rcu read lock region */ + bpf_rcu_read_lock(); + bkey = bpf_lookup_user_key(key_serial, flags); + bpf_rcu_read_unlock(); + if (!bkey) + return -1; + bpf_key_put(bkey); + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int cgrp_incorrect_rcu_region(void *ctx) +{ + struct task_struct *task; + struct css_set *cgroups; + struct cgroup *dfl_cgrp; + + /* load with rcu_ptr outside the rcu read lock region */ + bpf_rcu_read_lock(); + task = bpf_get_current_task_btf(); + cgroups = task->cgroups; + bpf_rcu_read_unlock(); + dfl_cgrp = cgroups->dfl_cgrp; + (void)bpf_cgrp_storage_get(&map_a, dfl_cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_incorrect_rcu_region1(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + + /* helper use of rcu ptr outside the rcu read lock region */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int task_incorrect_rcu_region2(void *ctx) +{ + struct task_struct *task, *real_parent; + + task = bpf_get_current_task_btf(); + + /* missing bpf_rcu_read_unlock() in one path */ + bpf_rcu_read_lock(); + real_parent = task->real_parent; + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (real_parent) + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int cross_rcu_region(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* rcu ptr define/use in different regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + bpf_rcu_read_unlock(); + bpf_rcu_read_lock(); + (void)bpf_task_storage_get(&map_b, real_parent, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + bpf_rcu_read_unlock(); + return 0; +} + -- 2.30.2