As is described in the "How to use MPTCP?" section in MPTCP wiki [1]: "Your app can create sockets with IPPROTO_MPTCP as the proto: ( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be forced to create and use MPTCP sockets instead of TCP ones via the mptcpize command bundled with the mptcpd daemon." But the mptcpize (LD_PRELOAD technique) command has some limitations [2]: - it doesn't work if the application is not using libc (e.g. GoLang apps) - in some envs, it might not be easy to set env vars / change the way apps are launched, e.g. on Android - mptcpize needs to be launched with all apps that want MPTCP: we could have more control from BPF to enable MPTCP only for some apps or all the ones of a netns or a cgroup, etc. - it is not in BPF, we cannot talk about it at netdev conf. So this patchset attempts to use BPF to implement functions similer to mptcpize. The main idea is add a hook in sys_socket() to change the protocol id from IPPROTO_TCP (or 0) to IPPROTO_MPTCP. 1. This first solution [3] is using "cgroup/sock_create": Implement a new helper bpf_mptcpify() to change the protocol id: +BPF_CALL_1(bpf_mptcpify, struct sock *, sk) +{ + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) { + sk->sk_protocol = IPPROTO_MPTCP; + return (unsigned long)sk; + } + + return (unsigned long)NULL; +} + +const struct bpf_func_proto bpf_mptcpify_proto = { + .func = bpf_mptcpify, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], + .arg1_type = ARG_PTR_TO_CTX, +}; Add a hook in "cgroup/sock_create" section, invoking bpf_mptcpify() helper in this hook: +SEC("cgroup/sock_create") +int sock(struct bpf_sock *ctx) +{ + struct sock *sk; + + if (ctx->type != SOCK_STREAM) + return 1; + + sk = bpf_mptcpify(ctx); + if (!sk) + return 1; + + protocol = sk->sk_protocol; + return 1; +} But this solution doesn't work, because the sock_create section is hooked at the end of inet_create(). It's too late to change the protocol id. 2. The second solution [4] is using "fentry": Implement a bpf_mptcpify() helper: +BPF_CALL_1(bpf_mptcpify, struct socket_args *, args) +{ + if (args->family == AF_INET && + args->type == SOCK_STREAM && + (!args->protocol || args->protocol == IPPROTO_TCP)) + args->protocol = IPPROTO_MPTCP; + + return 0; +} + +BTF_ID_LIST(bpf_mptcpify_btf_ids) +BTF_ID(struct, socket_args) + +static const struct bpf_func_proto bpf_mptcpify_proto = { + .func = bpf_mptcpify, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &bpf_mptcpify_btf_ids[0], +}; Add a new wrapper socket_create() in sys_socket() path, passing a pointer of struct socket_args (int family; int type; int protocol) to the wrapper. +int socket_create(struct socket_args *args, struct socket **res) +{ + return sock_create(args->family, args->type, args->protocol, res); +} +EXPORT_SYMBOL(socket_create); + /** * sock_create_kern - creates a socket (kernel space) * @net: net namespace @@ -1608,6 +1614,7 @@ EXPORT_SYMBOL(sock_create_kern); static struct socket *__sys_socket_create(int family, int type, int protocol) { + struct socket_args args = { 0 }; struct socket *sock; int retval; @@ -1621,7 +1628,10 @@ static struct socket *__sys_socket_create(int family, int type, int protocol) return ERR_PTR(-EINVAL); type &= SOCK_TYPE_MASK; - retval = sock_create(family, type, protocol, &sock); + args.family = family; + args.type = type; + args.protocol = protocol; + retval = socket_create(&args, &sock); if (retval < 0) return ERR_PTR(retval); Add "fentry" hook to the newly added wrapper, invoking bpf_mptcpify() in the hook: +SEC("fentry/socket_create") +int BPF_PROG(trace_socket_create, void *args, + struct socket **res) +{ + bpf_mptcpify(args); + return 0; +} This version works. But it's just a work around version. Since the code to add a wrapper to sys_socket() is not very elegant indeed, and it shouldn't be accepted by upstream. 3. The last solution is this patchset itself: Introduce new program type BPF_PROG_TYPE_CGROUP_SOCKINIT and attach type BPF_CGROUP_SOCKINIT on cgroup basis. Define BPF_CGROUP_RUN_PROG_SOCKINIT() helper, and implement __cgroup_bpf_run_sockinit() helper to run a sockinit program: +#define BPF_CGROUP_RUN_PROG_SOCKINIT(family, type, protocol) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled(CGROUP_SOCKINIT)) \ + __ret = __cgroup_bpf_run_sockinit(family, type, protocol, \ + CGROUP_SOCKINIT); \ + __ret; \ +}) + Invoke BPF_CGROUP_RUN_PROG_SOCKINIT() in __socket_create() to change the arguments: @@ -1469,6 +1469,12 @@ int __sock_create(struct net *net, int family, int type, int protocol, struct socket *sock; const struct net_proto_family *pf; + if (!kern) { + err = BPF_CGROUP_RUN_PROG_SOCKINIT(&family, &type, &protocol); + if (err) + return err; + } + /* * Check protocol is in range */ Define BPF program in this newly added 'sockinit' SEC, so it will be hooked in BPF_CGROUP_RUN_PROG_SOCKINIT() in __socket_create(). @@ -158,6 +158,11 @@ static struct sec_name_test tests[] = { {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT}, {0, BPF_CGROUP_SETSOCKOPT}, }, + { + "cgroup/sockinit", + {0, BPF_PROG_TYPE_CGROUP_SOCKINIT, BPF_CGROUP_SOCKINIT}, + {0, BPF_CGROUP_SOCKINIT}, + }, }; +SEC("cgroup/sockinit") +int mptcpify(struct bpf_sockinit_ctx *ctx) +{ + if ((ctx->family == AF_INET || ctx->family == AF_INET6) && + ctx->type == SOCK_STREAM && + (!ctx->protocol || ctx->protocol == IPPROTO_TCP)) { + ctx->protocol = IPPROTO_MPTCP; + } + + return 1; +} This version is the best solution we have found so far. [1] https://github.com/multipath-tcp/mptcp_net-next/wiki [2] https://github.com/multipath-tcp/mptcp_net-next/issues/79 [3] https://patchwork.kernel.org/project/mptcp/cover/cover.1688215769.git.geliang.tang@xxxxxxxx/ [4] https://patchwork.kernel.org/project/mptcp/cover/cover.1688366249.git.geliang.tang@xxxxxxxx/ Geliang Tang (8): bpf: Add new prog type sockinit bpf: Run a sockinit program net: socket: run sockinit hooks libbpf: Support sockinit hook selftests/bpf: Add mptcpify program selftests/bpf: use random netns name for mptcp selftests/bpf: add two mptcp netns helpers selftests/bpf: Add mptcpify selftest include/linux/bpf-cgroup-defs.h | 1 + include/linux/bpf-cgroup.h | 14 ++ include/linux/bpf_types.h | 2 + include/uapi/linux/bpf.h | 8 ++ kernel/bpf/cgroup.c | 90 +++++++++++++ kernel/bpf/syscall.c | 7 + kernel/bpf/verifier.c | 1 + net/socket.c | 6 + tools/include/uapi/linux/bpf.h | 8 ++ tools/lib/bpf/libbpf.c | 3 + tools/lib/bpf/libbpf_probes.c | 1 + .../bpf/cgroup_getset_retval_hooks.h | 1 + .../testing/selftests/bpf/prog_tests/mptcp.c | 123 ++++++++++++++++-- .../selftests/bpf/prog_tests/section_names.c | 5 + tools/testing/selftests/bpf/progs/mptcpify.c | 26 ++++ 15 files changed, 287 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c -- 2.35.3