On Tue, Mar 19, 2019 at 10:20:58AM +0000, Lorenz Bauer wrote: > Allow looking up a sock_common. This gives eBPF programs > access to timewait and request sockets. > > Signed-off-by: Lorenz Bauer <lmb@xxxxxxxxxxxxxx> > --- > include/uapi/linux/bpf.h | 20 ++++++- > kernel/bpf/verifier.c | 3 +- > net/core/filter.c | 113 +++++++++++++++++++++++++++++++++++---- > 3 files changed, 124 insertions(+), 12 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 983b25cb608d..8e4f8276942a 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2374,6 +2374,23 @@ union bpf_attr { > * Return > * A **struct bpf_sock** pointer on success, or NULL in > * case of failure. > + * > + * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) > + * Description > + * Look for TCP socket matching *tuple*, optionally in a child > + * network namespace *netns*. The return value must be checked, > + * and if non-**NULL**, released via **bpf_sk_release**\ (). > + * > + * This function is identical to bpf_sk_lookup_tcp, except that it > + * also returns timewait or request sockets. Use bpf_sk_fullsock > + * or bpf_tcp_socket to access the full structure. > + * > + * This helper is available only if the kernel was compiled with > + * **CONFIG_NET** configuration option. > + * Return > + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. > + * For sockets with reuseport option, the **struct bpf_sock** > + * result is from **reuse->socks**\ [] using the hash of the tuple. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -2474,7 +2491,8 @@ union bpf_attr { > FN(sk_fullsock), \ > FN(tcp_sock), \ > FN(skb_ecn_set_ce), \ > - FN(get_listener_sock), > + FN(get_listener_sock), \ > + FN(skc_lookup_tcp), > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > * function eBPF program intends to call > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index f60d9df4e00a..94420942af32 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -369,7 +369,8 @@ static bool is_release_function(enum bpf_func_id func_id) > static bool is_acquire_function(enum bpf_func_id func_id) > { > return func_id == BPF_FUNC_sk_lookup_tcp || > - func_id == BPF_FUNC_sk_lookup_udp; > + func_id == BPF_FUNC_sk_lookup_udp || > + func_id == BPF_FUNC_skc_lookup_tcp; > } > > static bool is_ptr_cast_function(enum bpf_func_id func_id) > diff --git a/net/core/filter.c b/net/core/filter.c > index f879791ea53f..f5210773cfd8 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -5156,15 +5156,15 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, > return sk; > } > > -/* bpf_sk_lookup performs the core lookup for different types of sockets, > +/* bpf_skc_lookup performs the core lookup for different types of sockets, > * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE. > * Returns the socket as an 'unsigned long' to simplify the casting in the > * callers to satisfy BPF_CALL declarations. > */ > static unsigned long > -__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, > - struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, > - u64 flags) > +__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, > + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, > + u64 flags) > { > struct sock *sk = NULL; > u8 family = AF_UNSPEC; > @@ -5192,15 +5192,28 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, > put_net(net); > } > > - if (sk) > - sk = sk_to_full_sk(sk); > out: > return (unsigned long) sk; > } > > static unsigned long > -bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, > - u8 proto, u64 netns_id, u64 flags) > +__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, > + struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, > + u64 flags) > +{ > + struct sock *sk; > + > + sk = (struct sock *)__bpf_skc_lookup(skb, tuple, len, caller_net, > + ifindex, proto, netns_id, flags); This back and forth casting of pointer and long here and in bpf_sk_lookup is not pretty. Before this patch there was one ulong cast, but now we probably need to refactor this a bit. Like changing __bpf_skc_lookup to return 'struct sock *' and do final (ulong) cast only in bpf_[xdp_]sk[c]_lookup_[tcp|udp] that return it to bpf side?