On 08/15, thinker.li@xxxxxxxxx wrote: > From: Kui-Feng Lee <thinker.li@xxxxxxxxx> > > Do the same test as non-sleepable ones. > > Signed-off-by: Kui-Feng Lee <thinker.li@xxxxxxxxx> > --- > .../testing/selftests/bpf/bpf_experimental.h | 36 +++ > tools/testing/selftests/bpf/bpf_kfuncs.h | 41 +++ > .../selftests/bpf/prog_tests/sockopt_sk.c | 112 +++++++- > .../testing/selftests/bpf/progs/sockopt_sk.c | 257 ++++++++++++++++++ > .../selftests/bpf/verifier/sleepable.c | 2 +- > 5 files changed, 445 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h > index 209811b1993a..9b5dfefe65dc 100644 > --- a/tools/testing/selftests/bpf/bpf_experimental.h > +++ b/tools/testing/selftests/bpf/bpf_experimental.h > @@ -131,4 +131,40 @@ extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *nod > */ > extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym; > > +/* > + * Description > + * Copy data from *ptr* to *sopt->optval*. > + * Return > + * >= 0 on success, or a negative error in case of failure. > + */ > +extern int bpf_sockopt_dynptr_copy_to(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr) __ksym; > + > +/* Description > + * Allocate a buffer of 'size' bytes for being installed as optval. > + * Returns > + * > 0 on success, the size of the allocated buffer > + * -ENOMEM or -EINVAL on failure > + */ > +extern int bpf_sockopt_dynptr_alloc(struct bpf_sockopt *sopt, int size, > + struct bpf_dynptr *ptr__uninit) __ksym; > + > +/* Description > + * Install the buffer pointed to by 'ptr' as optval. > + * Returns > + * 0 on success > + * -EINVAL if the buffer is too small > + */ > +extern int bpf_sockopt_dynptr_install(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr) __ksym; > + > +/* Description > + * Release the buffer allocated by bpf_sockopt_dynptr_alloc. > + * Returns > + * 0 on success > + * -EINVAL if the buffer was not allocated by bpf_sockopt_dynptr_alloc > + */ > +extern int bpf_sockopt_dynptr_release(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr) __ksym; > + > #endif > diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h > index 642dda0e758a..772040225257 100644 > --- a/tools/testing/selftests/bpf/bpf_kfuncs.h > +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h > @@ -41,4 +41,45 @@ extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; > extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; > extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; > > +extern int bpf_sockopt_dynptr_copy_to(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr) __ksym; > + > +/* Description > + * Allocate a buffer of 'size' bytes for being installed as optval. > + * Returns > + * > 0 on success, the size of the allocated buffer > + * -ENOMEM or -EINVAL on failure > + */ > +extern int bpf_sockopt_dynptr_alloc(struct bpf_sockopt *sopt, int size, > + struct bpf_dynptr *ptr__uninit) __ksym; > + > +/* Description > + * Install the buffer pointed to by 'ptr' as optval. > + * Returns > + * 0 on success > + * -EINVAL if the buffer is too small > + */ > +extern int bpf_sockopt_dynptr_install(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr) __ksym; > + > +/* Description > + * Release the buffer allocated by bpf_sockopt_dynptr_alloc. > + * Returns > + * 0 on success > + * -EINVAL if the buffer was not allocated by bpf_sockopt_dynptr_alloc > + */ > +extern int bpf_sockopt_dynptr_release(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr) __ksym; > + > +/* Description > + * Initialize a dynptr to access the content of optval passing > + * to {get,set}sockopt()s. > + * Returns > + * > 0 on success, the size of the allocated buffer > + * -ENOMEM or -EINVAL on failure > + */ > +extern int bpf_sockopt_dynptr_from(struct bpf_sockopt *sopt, > + struct bpf_dynptr *ptr__uninit, > + unsigned int size) __ksym; > + > #endif > diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c > index 05d0e07da394..85255648747f 100644 > --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c > +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c > @@ -92,6 +92,7 @@ static int getsetsockopt(void) > } > if (buf.u8[0] != 0x01) { > log_err("Unexpected buf[0] 0x%02x != 0x01", buf.u8[0]); > + log_err("optlen %d", optlen); > goto err; > } > > @@ -220,7 +221,7 @@ static int getsetsockopt(void) > return -1; > } > > -static void run_test(int cgroup_fd) > +static void run_test_nonsleepable(int cgroup_fd) > { > struct sockopt_sk *skel; > > @@ -246,6 +247,106 @@ static void run_test(int cgroup_fd) > sockopt_sk__destroy(skel); > } > > +static void run_test_nonsleepable_mixed(int cgroup_fd) > +{ > + struct sockopt_sk *skel; > + > + skel = sockopt_sk__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "skel_load")) > + goto cleanup; > + > + skel->bss->page_size = getpagesize(); > + skel->bss->skip_sleepable = 1; > + > + skel->links._setsockopt_s = > + bpf_program__attach_cgroup(skel->progs._setsockopt_s, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._setsockopt_s, "setsockopt_link (sleepable)")) > + goto cleanup; > + > + skel->links._getsockopt_s = > + bpf_program__attach_cgroup(skel->progs._getsockopt_s, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._getsockopt_s, "getsockopt_link (sleepable)")) > + goto cleanup; > + > + skel->links._setsockopt = > + bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link")) > + goto cleanup; > + > + skel->links._getsockopt = > + bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link")) > + goto cleanup; > + > + ASSERT_OK(getsetsockopt(), "getsetsockopt"); > + > +cleanup: > + sockopt_sk__destroy(skel); > +} > + > +static void run_test_sleepable(int cgroup_fd) > +{ > + struct sockopt_sk *skel; > + > + skel = sockopt_sk__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "skel_load")) > + goto cleanup; > + > + skel->bss->page_size = getpagesize(); > + > + skel->links._setsockopt_s = > + bpf_program__attach_cgroup(skel->progs._setsockopt_s, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._setsockopt_s, "setsockopt_link")) > + goto cleanup; > + > + skel->links._getsockopt_s = > + bpf_program__attach_cgroup(skel->progs._getsockopt_s, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._getsockopt_s, "getsockopt_link")) > + goto cleanup; > + > + ASSERT_OK(getsetsockopt(), "getsetsockopt"); > + > +cleanup: > + sockopt_sk__destroy(skel); > +} > + > +static void run_test_sleepable_mixed(int cgroup_fd) > +{ > + struct sockopt_sk *skel; > + > + skel = sockopt_sk__open_and_load(); > + if (!ASSERT_OK_PTR(skel, "skel_load")) > + goto cleanup; > + > + skel->bss->page_size = getpagesize(); > + skel->bss->skip_nonsleepable = 1; > + > + skel->links._setsockopt = > + bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link (nonsleepable)")) > + goto cleanup; > + > + skel->links._getsockopt = > + bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link (nonsleepable)")) > + goto cleanup; > + > + skel->links._setsockopt_s = > + bpf_program__attach_cgroup(skel->progs._setsockopt_s, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._setsockopt_s, "setsockopt_link")) > + goto cleanup; > + > + skel->links._getsockopt_s = > + bpf_program__attach_cgroup(skel->progs._getsockopt_s, cgroup_fd); > + if (!ASSERT_OK_PTR(skel->links._getsockopt_s, "getsockopt_link")) > + goto cleanup; > + > + ASSERT_OK(getsetsockopt(), "getsetsockopt"); > + > +cleanup: > + sockopt_sk__destroy(skel); > +} > + > void test_sockopt_sk(void) > { > int cgroup_fd; > @@ -254,6 +355,13 @@ void test_sockopt_sk(void) > if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /sockopt_sk")) > return; > > - run_test(cgroup_fd); > + if (test__start_subtest("nonsleepable")) > + run_test_nonsleepable(cgroup_fd); > + if (test__start_subtest("sleepable")) > + run_test_sleepable(cgroup_fd); > + if (test__start_subtest("nonsleepable_mixed")) > + run_test_nonsleepable_mixed(cgroup_fd); > + if (test__start_subtest("sleepable_mixed")) > + run_test_sleepable_mixed(cgroup_fd); > close(cgroup_fd); > } > diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c > index cb990a7d3d45..efacd3b88c40 100644 > --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c > +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c > @@ -5,10 +5,16 @@ > #include <netinet/in.h> > #include <bpf/bpf_helpers.h> > > +typedef int bool; > +#include "bpf_kfuncs.h" > + > char _license[] SEC("license") = "GPL"; > > int page_size = 0; /* userspace should set it */ > > +int skip_sleepable = 0; > +int skip_nonsleepable = 0; > + > #ifndef SOL_TCP > #define SOL_TCP IPPROTO_TCP > #endif > @@ -34,6 +40,9 @@ int _getsockopt(struct bpf_sockopt *ctx) > struct sockopt_sk *storage; > struct bpf_sock *sk; > > + if (skip_nonsleepable) > + return 1; > + > /* Bypass AF_NETLINK. */ > sk = ctx->sk; > if (sk && sk->family == AF_NETLINK) > @@ -136,6 +145,134 @@ int _getsockopt(struct bpf_sockopt *ctx) > return 1; > } > > +SEC("cgroup/getsockopt.s") > +int _getsockopt_s(struct bpf_sockopt *ctx) > +{ > + struct tcp_zerocopy_receive *zcvr; > + struct bpf_dynptr optval_dynptr; > + struct sockopt_sk *storage; > + __u8 *optval, *optval_end; > + struct bpf_sock *sk; > + char buf[1]; > + __u64 addr; > + int ret; > + > + if (skip_sleepable) > + return 1; > + > + /* Bypass AF_NETLINK. */ > + sk = ctx->sk; > + if (sk && sk->family == AF_NETLINK) > + return 1; > + > + optval = ctx->optval; > + optval_end = ctx->optval_end; > + > + /* Make sure bpf_get_netns_cookie is callable. > + */ > + if (bpf_get_netns_cookie(NULL) == 0) > + return 0; > + > + if (bpf_get_netns_cookie(ctx) == 0) > + return 0; > + > + if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { > + /* Not interested in SOL_IP:IP_TOS; > + * let next BPF program in the cgroup chain or kernel > + * handle it. > + */ > + return 1; > + } > + > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { > + /* Not interested in SOL_SOCKET:SO_SNDBUF; > + * let next BPF program in the cgroup chain or kernel > + * handle it. > + */ > + return 1; > + } > + > + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { > + /* Not interested in SOL_TCP:TCP_CONGESTION; > + * let next BPF program in the cgroup chain or kernel > + * handle it. > + */ > + return 1; > + } > + > + if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { > + /* Verify that TCP_ZEROCOPY_RECEIVE triggers. > + * It has a custom implementation for performance > + * reasons. > + */ > + > + bpf_sockopt_dynptr_from(ctx, &optval_dynptr, sizeof(*zcvr)); > + zcvr = bpf_dynptr_data(&optval_dynptr, 0, sizeof(*zcvr)); > + addr = zcvr ? zcvr->address : 0; > + bpf_sockopt_dynptr_release(ctx, &optval_dynptr); This starts to look more usable, thank you for the changes! Let me poke the api a bit more, I'm not super familiar with the dynptrs. here: bpf_sockopt_dynptr_from should probably be called bpf_dynptr_from_sockopt to match bpf_dynptr_from_mem? > + > + return addr != 0 ? 0 : 1; > + } > + > + if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { > + if (optval + 1 > optval_end) > + return 0; /* bounds check */ > + > + ctx->retval = 0; /* Reset system call return value to zero */ > + > + /* Always export 0x55 */ > + buf[0] = 0x55; > + ret = bpf_sockopt_dynptr_alloc(ctx, 1, &optval_dynptr); > + if (ret >= 0) { > + bpf_dynptr_write(&optval_dynptr, 0, buf, 1, 0); > + ret = bpf_sockopt_dynptr_copy_to(ctx, &optval_dynptr); > + } > + bpf_sockopt_dynptr_release(ctx, &optval_dynptr); Does bpf_sockopt_dynptr_alloc and bpf_sockopt_dynptr_release need to be sockopt specific? Seems like we should provide, instead, some generic bpf_dynptr_alloc/bpf_dynptr_release and make bpf_sockopt_dynptr_copy_to/install work with them? WDYT? > + if (ret < 0) > + return 0; > + ctx->optlen = 1; > + > + /* Userspace buffer is PAGE_SIZE * 2, but BPF > + * program can only see the first PAGE_SIZE > + * bytes of data. > + */ > + if (optval_end - optval != page_size && 0) > + return 0; /* unexpected data size */ > + > + return 1; > + } > + > + if (ctx->level != SOL_CUSTOM) > + return 0; /* deny everything except custom level */ > + > + if (optval + 1 > optval_end) > + return 0; /* bounds check */ > + > + storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, > + BPF_SK_STORAGE_GET_F_CREATE); > + if (!storage) > + return 0; /* couldn't get sk storage */ > + > + if (!ctx->retval) > + return 0; /* kernel should not have handled > + * SOL_CUSTOM, something is wrong! > + */ > + ctx->retval = 0; /* Reset system call return value to zero */ > + > + buf[0] = storage->val; > + ret = bpf_sockopt_dynptr_alloc(ctx, 1, &optval_dynptr); > + if (ret >= 0) { > + bpf_dynptr_write(&optval_dynptr, 0, buf, 1, 0); > + ret = bpf_sockopt_dynptr_copy_to(ctx, &optval_dynptr); > + } > + bpf_sockopt_dynptr_release(ctx, &optval_dynptr); > + if (ret < 0) > + return 0; > + ctx->optlen = 1; > + > + return 1; > +} > + > SEC("cgroup/setsockopt") > int _setsockopt(struct bpf_sockopt *ctx) > { > @@ -144,6 +281,9 @@ int _setsockopt(struct bpf_sockopt *ctx) > struct sockopt_sk *storage; > struct bpf_sock *sk; > > + if (skip_nonsleepable) > + return 1; > + > /* Bypass AF_NETLINK. */ > sk = ctx->sk; > if (sk && sk->family == AF_NETLINK) > @@ -236,3 +376,120 @@ int _setsockopt(struct bpf_sockopt *ctx) > ctx->optlen = 0; > return 1; > } > + > +SEC("cgroup/setsockopt.s") > +int _setsockopt_s(struct bpf_sockopt *ctx) > +{ > + struct bpf_dynptr optval_buf; > + struct sockopt_sk *storage; > + __u8 *optval, *optval_end; > + struct bpf_sock *sk; > + __u8 tmp_u8; > + __u32 tmp; > + int ret; > + > + if (skip_sleepable) > + return 1; > + > + optval = ctx->optval; > + optval_end = ctx->optval_end; > + > + /* Bypass AF_NETLINK. */ > + sk = ctx->sk; > + if (sk && sk->family == AF_NETLINK) > + return -1; > + > + /* Make sure bpf_get_netns_cookie is callable. > + */ > + if (bpf_get_netns_cookie(NULL) == 0) > + return 0; > + > + if (bpf_get_netns_cookie(ctx) == 0) > + return 0; > + > + if (ctx->level == SOL_IP && ctx->optname == IP_TOS) { > + /* Not interested in SOL_IP:IP_TOS; > + * let next BPF program in the cgroup chain or kernel > + * handle it. > + */ > + ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ > + return 1; > + } > + > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { > + /* Overwrite SO_SNDBUF value */ > + > + ret = bpf_sockopt_dynptr_alloc(ctx, sizeof(__u32), > + &optval_buf); > + if (ret < 0) > + bpf_sockopt_dynptr_release(ctx, &optval_buf); > + else { > + tmp = 0x55AA; > + bpf_dynptr_write(&optval_buf, 0, &tmp, sizeof(tmp), 0); > + ret = bpf_sockopt_dynptr_install(ctx, &optval_buf); One thing I'm still slightly confused about is bpf_sockopt_dynptr_install vs bpf_sockopt_dynptr_copy_to. I do understand that it comes from getsockopt vs setsockopt (and the ability, in setsockopt, to allocate larger buffers), but I wonder whether we can hide everything under single bpf_sockopt_dynptr_copy_to call? For getsockopt, it stays as is. For setsockopt, it would work like _install currently does. Would that work? From user perspective, if we provide a simple call that does the right thing, seems a bit more usable? The only problem is probably the fact that _install explicitly moves the ownership, but I don't see why copy_to can't have the same "consume" semantics?