On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote: > Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to > distinguish the connections with same five-tuples, for example when we do the > sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections > in different containers on same node. > And we export the netns inum instead of the real pointer of struct net to avoid > the potential security issue. > > Signed-off-by: Wang Li <wangli09@xxxxxxxxxxxx> > Signed-off-by: huangxuesen <huangxuesen@xxxxxxxxxxxx> > Signed-off-by: yangxingwu <yangxingwu@xxxxxxxxxxxx> > --- > include/uapi/linux/bpf.h | 2 ++ > net/core/filter.c | 17 +++++++++++++++++ > tools/include/uapi/linux/bpf.h | 2 ++ > 3 files changed, 21 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c65b374a5090..0fe7e459f023 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -3947,6 +3947,8 @@ struct bpf_sock_ops { > * there is a full socket. If not, the > * fields read as zero. > */ > + __u32 netns_inum; /* The net namespace this sock belongs to */ > + In uapi/linux/bpf.h we have a field `netns_ino` for storing net namespace inode number in a couple structs (bpf_prog_info, bpf_map_info). Would be nice to keep the naming constent. > __u32 snd_cwnd; > __u32 srtt_us; /* Averaged RTT << 3 in usecs */ > __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ > diff --git a/net/core/filter.c b/net/core/filter.c > index d01a244b5087..bfe448ace25f 100644 > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, > is_fullsock)); > break; > > + case offsetof(struct bpf_sock_ops, netns_inum): > +#ifdef CONFIG_NET_NS > + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( > + struct bpf_sock_ops_kern, sk), > + si->dst_reg, si->src_reg, > + offsetof(struct bpf_sock_ops_kern, sk)); > + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( > + struct sock_common, skc_net), > + si->dst_reg, si->dst_reg, > + offsetof(struct sock_common, skc_net)); > + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, > + offsetof(struct net, ns.inum)); > +#else > + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); > +#endif > + break; > + > case offsetof(struct bpf_sock_ops, state): > BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1); > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index c65b374a5090..0fe7e459f023 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -3947,6 +3947,8 @@ struct bpf_sock_ops { > * there is a full socket. If not, the > * fields read as zero. > */ > + __u32 netns_inum; /* The net namespace this sock belongs to */ > + > __u32 snd_cwnd; > __u32 srtt_us; /* Averaged RTT << 3 in usecs */ > __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */