Jakub, thanks for your comments. > 在 2020年6月5日,下午10:53,Jakub Sitnicki <jakub@xxxxxxxxxxxxxx> 写道: > > On Fri, Jun 05, 2020 at 02:40 PM CEST, Wang Li wrote: >> Sometimes we need net namespace as part of the key for BPF_MAP_TYPE_SOCKHASH to >> distinguish the connections with same five-tuples, for example when we do the >> sock_map acceleration for the proxy that uses 127.0.0.1 to 127.0.0.1 connections >> in different containers on same node. >> And we export the netns inum instead of the real pointer of struct net to avoid >> the potential security issue. >> >> Signed-off-by: Wang Li <wangli09@xxxxxxxxxxxx> >> Signed-off-by: huangxuesen <huangxuesen@xxxxxxxxxxxx> >> Signed-off-by: yangxingwu <yangxingwu@xxxxxxxxxxxx> >> --- >> include/uapi/linux/bpf.h | 2 ++ >> net/core/filter.c | 17 +++++++++++++++++ >> tools/include/uapi/linux/bpf.h | 2 ++ >> 3 files changed, 21 insertions(+) >> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index c65b374a5090..0fe7e459f023 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops { >> * there is a full socket. If not, the >> * fields read as zero. >> */ >> + __u32 netns_inum; /* The net namespace this sock belongs to */ >> + > > In uapi/linux/bpf.h we have a field `netns_ino` for storing net > namespace inode number in a couple structs (bpf_prog_info, > bpf_map_info). Would be nice to keep the naming constent. > >> __u32 snd_cwnd; >> __u32 srtt_us; /* Averaged RTT << 3 in usecs */ >> __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */ >> diff --git a/net/core/filter.c b/net/core/filter.c >> index d01a244b5087..bfe448ace25f 100644 >> --- a/net/core/filter.c >> +++ b/net/core/filter.c >> @@ -8450,6 +8450,23 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, >> is_fullsock)); >> break; >> >> + case offsetof(struct bpf_sock_ops, netns_inum): >> +#ifdef CONFIG_NET_NS >> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( >> + struct bpf_sock_ops_kern, sk), >> + si->dst_reg, si->src_reg, >> + offsetof(struct bpf_sock_ops_kern, sk)); >> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( >> + struct sock_common, skc_net), >> + si->dst_reg, si->dst_reg, >> + offsetof(struct sock_common, skc_net)); >> + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, >> + offsetof(struct net, ns.inum)); >> +#else >> + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); >> +#endif >> + break; >> + >> case offsetof(struct bpf_sock_ops, state): >> BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1); >> >> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h >> index c65b374a5090..0fe7e459f023 100644 >> --- a/tools/include/uapi/linux/bpf.h >> +++ b/tools/include/uapi/linux/bpf.h >> @@ -3947,6 +3947,8 @@ struct bpf_sock_ops { >> * there is a full socket. If not, the >> * fields read as zero. >> */ >> + __u32 netns_inum; /* The net namespace this sock belongs to */ >> + >> __u32 snd_cwnd; >> __u32 srtt_us; /* Averaged RTT << 3 in usecs */ >> __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */