Following ipv4 stack changes, run a BPF program attached to netns before looking up a listening socket. Program can return a listening socket to use as result of socket lookup, fail the lookup, or take no action. Suggested-by: Marek Majkowski <marek@xxxxxxxxxxxxxx> Signed-off-by: Jakub Sitnicki <jakub@xxxxxxxxxxxxxx> --- Notes: v3: - Use a static_key to minimize the hook overhead when not used. (Alexei) - Don't copy struct in6_addr when populating BPF prog context. (Martin) - Adapt for running an array of attached programs. (Alexei) - Adapt for optionally skipping reuseport selection. (Martin) include/linux/filter.h | 41 +++++++++++++++++++++++++++++++++++++ net/ipv6/inet6_hashtables.c | 35 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/include/linux/filter.h b/include/linux/filter.h index ff7721d862c2..e7462f178213 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1336,4 +1336,45 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol, return do_reuseport; } +#if IS_ENABLED(CONFIG_IPV6) +static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 dport, + struct sock **psk) +{ + struct bpf_prog_array *run_array; + bool do_reuseport = false; + struct sock *sk = NULL; + + rcu_read_lock(); + run_array = rcu_dereference(net->bpf.run_array[NETNS_BPF_SK_LOOKUP]); + if (run_array) { + const struct bpf_sk_lookup_kern ctx = { + .family = AF_INET6, + .protocol = protocol, + .v6.saddr = saddr, + .v6.daddr = daddr, + .sport = sport, + .dport = dport, + }; + u32 ret; + + ret = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, &ctx, + BPF_PROG_RUN); + if (ret & (1U << BPF_REDIRECT)) { + sk = ctx.selected_sk; + do_reuseport = sk && !ctx.no_reuseport; + } else if (ret & (1U << BPF_DROP)) { + sk = ERR_PTR(-ECONNREFUSED); + } + } + rcu_read_unlock(); + + *psk = sk; + return do_reuseport; +} +#endif /* IS_ENABLED(CONFIG_IPV6) */ + #endif /* __LINUX_FILTER_H__ */ diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 03942eef8ab6..b63583d2aa76 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -21,6 +21,8 @@ #include <net/ip.h> #include <net/sock_reuseport.h> +extern struct inet_hashinfo tcp_hashinfo; + u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) @@ -159,6 +161,31 @@ static struct sock *inet6_lhash2_lookup(struct net *net, return result; } +static inline struct sock *inet6_lookup_run_bpf(struct net *net, + struct inet_hashinfo *hashinfo, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum) +{ + struct sock *sk, *reuse_sk; + bool do_reuseport; + + if (hashinfo != &tcp_hashinfo) + return NULL; /* only TCP is supported */ + + do_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, + saddr, sport, daddr, hnum, &sk); + if (do_reuseport) { + reuse_sk = lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum); + if (reuse_sk) + sk = reuse_sk; + } + return sk; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -170,6 +197,14 @@ struct sock *inet6_lookup_listener(struct net *net, struct sock *result = NULL; unsigned int hash2; + /* Lookup redirect from BPF */ + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { + result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, + saddr, sport, daddr, hnum); + if (result) + goto done; + } + hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); -- 2.25.4