On Thu, Jan 21, 2021 at 02:57:44PM -0800, sdf@xxxxxxxxxx wrote: > On 01/21, Martin KaFai Lau wrote: > > On Wed, Jan 20, 2021 at 05:22:41PM -0800, Stanislav Fomichev wrote: > > > BPF rewrites from 111 to 111, but it still should mark the port as > > > "changed". > > > We also verify that if port isn't touched by BPF, it's still prohibited. > > > > > > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> > > > --- > > > .../selftests/bpf/prog_tests/bind_perm.c | 88 +++++++++++++++++++ > > > tools/testing/selftests/bpf/progs/bind_perm.c | 36 ++++++++ > > > 2 files changed, 124 insertions(+) > > > create mode 100644 tools/testing/selftests/bpf/prog_tests/bind_perm.c > > > create mode 100644 tools/testing/selftests/bpf/progs/bind_perm.c > > > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c > > b/tools/testing/selftests/bpf/prog_tests/bind_perm.c > > > new file mode 100644 > > > index 000000000000..840a04ac9042 > > > --- /dev/null > > > +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c > > > @@ -0,0 +1,88 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > +#include <test_progs.h> > > > +#include "bind_perm.skel.h" > > > + > > > +#include <sys/types.h> > > > +#include <sys/socket.h> > > > +#include <sys/capability.h> > > > + > > > +static int duration; > > > + > > > +void try_bind(int port, int expected_errno) > > > +{ > > > + struct sockaddr_in sin = {}; > > > + int fd = -1; > > > + > > > + fd = socket(AF_INET, SOCK_STREAM, 0); > > > + if (CHECK(fd < 0, "fd", "errno %d", errno)) > > > + goto close_socket; > > > + > > > + sin.sin_family = AF_INET; > > > + sin.sin_port = htons(port); > > > + > > > + errno = 0; > > > + bind(fd, (struct sockaddr *)&sin, sizeof(sin)); > > > + CHECK(errno != expected_errno, "bind", "errno %d, expected %d", > > > + errno, expected_errno); > > > + > > > +close_socket: > > > + if (fd >= 0) > > > + close(fd); > > > +} > > > + > > > +void cap_net_bind_service(cap_flag_value_t flag) > > > +{ > > > + const cap_value_t cap_net_bind_service = CAP_NET_BIND_SERVICE; > > > + cap_t caps; > > > + > > > + caps = cap_get_proc(); > > > + if (CHECK(!caps, "cap_get_proc", "errno %d", errno)) > > > + goto free_caps; > > > + > > > + if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service, > > > + CAP_CLEAR), > > > + "cap_set_flag", "errno %d", errno)) > > > + goto free_caps; > > > + > > > + if (CHECK(cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_bind_service, > > > + CAP_CLEAR), > > > + "cap_set_flag", "errno %d", errno)) > > > + goto free_caps; > > > + > > > + if (CHECK(cap_set_proc(caps), "cap_set_proc", "errno %d", errno)) > > > + goto free_caps; > > > + > > > +free_caps: > > > + if (CHECK(cap_free(caps), "cap_free", "errno %d", errno)) > > > + goto free_caps; > > > +} > > > + > > > +void test_bind_perm(void) > > > +{ > > > + struct bind_perm *skel; > > > + int cgroup_fd; > > > + > > > + cgroup_fd = test__join_cgroup("/bind_perm"); > > > + if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) > > > + return; > > > + > > > + skel = bind_perm__open_and_load(); > > > + if (CHECK(!skel, "skel-load", "errno %d", errno)) > > > + goto close_cgroup_fd; > > > + > > > + skel->links.bind_v4_prog = > > bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd); > > > + if (CHECK(IS_ERR(skel->links.bind_v4_prog), > > > + "cg-attach", "bind4 %ld", > > > + PTR_ERR(skel->links.bind_v4_prog))) > > > + goto close_skeleton; > > > + > > > + cap_net_bind_service(CAP_CLEAR); > > > + try_bind(110, EACCES); > > > + try_bind(111, 0); > > > + cap_net_bind_service(CAP_SET); > > > + > > > +close_skeleton: > > > + bind_perm__destroy(skel); > > > +close_cgroup_fd: > > > + close(cgroup_fd); > > > +} > > > diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c > > b/tools/testing/selftests/bpf/progs/bind_perm.c > > > new file mode 100644 > > > index 000000000000..2194587ec806 > > > --- /dev/null > > > +++ b/tools/testing/selftests/bpf/progs/bind_perm.c > > > @@ -0,0 +1,36 @@ > > > +// SPDX-License-Identifier: GPL-2.0 > > > + > > > +#include <linux/stddef.h> > > > +#include <linux/bpf.h> > > > +#include <sys/types.h> > > > +#include <sys/socket.h> > > > +#include <bpf/bpf_helpers.h> > > > +#include <bpf/bpf_endian.h> > > > + > > > +SEC("cgroup/bind4") > > > +int bind_v4_prog(struct bpf_sock_addr *ctx) > > > +{ > > > + struct bpf_sock *sk; > > > + __u32 user_ip4; > > > + __u16 user_port; > > > + > > > + sk = ctx->sk; > > > + if (!sk) > > > + return 0; > > > + > > > + if (sk->family != AF_INET) > > > + return 0; > > > + > > > + if (ctx->type != SOCK_STREAM) > > > + return 0; > > > + > > > + /* Rewriting to the same value should still cause > > > + * permission check to be bypassed. > > > + */ > > > + if (ctx->user_port == bpf_htons(111)) > > > + ctx->user_port = bpf_htons(111); > > iiuc, this overwrite is essentially the way to ensure the bind > > will succeed (override CAP_NET_BIND_SERVICE in this particular case?). > Correct. The alternative might be to export ignore_perm_check > via bpf_sock_addr and make it explicit. An explicit field is one option. or a different return value (e.g. BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY). Not sure which one (including the one in the current patch) is better at this point. Also, from patch 1, if one cgrp bpf prog says no-perm-check, it does not matter what the latter cgrp bpf progs have to say? > > > It seems to be okay if we consider most of the use cases is rewriting > > to a different port. > > > However, it is quite un-intuitive to the bpf prog to overwrite with > > the same user_port just to ensure this port can be binded successfully > > later. > I'm testing a corner case here when the address is rewritten to the same > value, but the intention is to rewrite X to Y < 1024. It is a legit corner case though. Also, is it possible that the compiler may optimize this same-value-assignment out? > > > Is user_port the only case? How about other fields in bpf_sock_addr? > Good question. For our use case only the port matters because > we rewrite both port and address (and never only address). > > It does feel like it should also work when BPF rewrites address only > (and port happens to be in the privileged range). I guess I can > apply the same logic to the user_ip4 and user_ip6? My concern is having more cases that need to overwrite with the same value. Then it may make a stronger case to use return value or an explicit field.