[RPC PATCH bpf-next] bpf: implement new BPF_CGROUP_INET_SOCK_POST_CONNECT

Stanislav Fomichev <sdf@xxxxxxxxxx> · Thu, 14 Jan 2021 16:17:24 -0800

We are playing with doing hybrid conntrack where BPF generates
connect/disconnect/etc events and puts them into perfbuf (or, later,
new ringbuf). We can get most of the functionality out of
existing hooks:
- BPF_CGROUP_SOCK_OPS fully covers TCP
- BPF_CGROUP_UDP4_SENDMSG covers unconnected UDP (with sampling, etc)

The only missing bit is connected UDP where we can get some
information from the existing BPF_CGROUP_INET{4,6}_CONNECT if the caller
did explicit bind(); otherwise, in an autobind case, we get
only destination addr/port and no source port because this hook
triggers prior to that.

We'd really like to avoid the cost of BPF_CGROUP_INET_EGRESS
and filtering UDP (which covers both connected and unconnected UDP,
but loses that connect/disconnect pseudo signal).

The proposal is to add a new BPF_CGROUP_INET_SOCK_POST_CONNECT which
triggers right before sys_connect exits in the AF_INET{,6} case.
The context is bpf_sock which lets BPF examine the socket state.
There is really no reason for it to trigger for all inet socks,
I've considered adding BPF_CGROUP_UDP_POST_CONNECT, but decided
that it might be better to have a generic inet case.

New hook triggers right before sys_connect() returns and gives
BPF an opportunity to explore source & destination addresses
as well as ability to return EPERM to the user.

This is somewhat analogous to the existing BPF_CGROUP_INET{4,6}_POST_BIND
hooks with the intention to log the connection addresses (after autobind).

Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
Change-Id: I46d0122f93c58b17bfae5ba5040b0b0343908c19
---
 include/linux/bpf-cgroup.h | 17 +++++++++++++++++
 include/uapi/linux/bpf.h   |  1 +
 kernel/bpf/syscall.c       |  3 +++
 net/core/filter.c          |  4 ++++
 net/ipv4/af_inet.c         |  7 ++++++-
 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 72e69a0e1e8c..f110935258b9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -213,12 +213,29 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 	__ret;								       \
 })
 
+#define BPF_CGROUP_RUN_SK_PROG_LOCKED(sk, type)				       \
+({									       \
+	int __ret = 0;							       \
+	if (cgroup_bpf_enabled) {					       \
+		lock_sock(sk);						       \
+		__ret = __cgroup_bpf_run_filter_sk(sk, type);		       \
+		release_sock(sk);					       \
+	}								       \
+	__ret;								       \
+})
+
 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)				       \
 	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
 
 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk)			       \
 	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
 
+#define BPF_CGROUP_RUN_PROG_INET_SOCK_POST_CONNECT(sk)			       \
+	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_POST_CONNECT)
+
+#define BPF_CGROUP_RUN_PROG_INET_SOCK_POST_CONNECT_LOCKED(sk)		       \
+	BPF_CGROUP_RUN_SK_PROG_LOCKED(sk, BPF_CGROUP_INET_SOCK_POST_CONNECT)
+
 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk)				       \
 	BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a1ad32456f89..3235f7bd131f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -241,6 +241,7 @@ enum bpf_attach_type {
 	BPF_XDP_CPUMAP,
 	BPF_SK_LOOKUP,
 	BPF_XDP,
+	BPF_CGROUP_INET_SOCK_POST_CONNECT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c3bb03c8371f..7d6fd1e32d22 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1958,6 +1958,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		switch (expected_attach_type) {
 		case BPF_CGROUP_INET_SOCK_CREATE:
 		case BPF_CGROUP_INET_SOCK_RELEASE:
+		case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 		case BPF_CGROUP_INET4_POST_BIND:
 		case BPF_CGROUP_INET6_POST_BIND:
 			return 0;
@@ -2910,6 +2911,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 		return BPF_PROG_TYPE_CGROUP_SKB;
 	case BPF_CGROUP_INET_SOCK_CREATE:
 	case BPF_CGROUP_INET_SOCK_RELEASE:
+	case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 	case BPF_CGROUP_INET4_POST_BIND:
 	case BPF_CGROUP_INET6_POST_BIND:
 		return BPF_PROG_TYPE_CGROUP_SOCK;
@@ -3063,6 +3065,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET_EGRESS:
 	case BPF_CGROUP_INET_SOCK_CREATE:
 	case BPF_CGROUP_INET_SOCK_RELEASE:
+	case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 	case BPF_CGROUP_INET4_BIND:
 	case BPF_CGROUP_INET6_BIND:
 	case BPF_CGROUP_INET4_POST_BIND:
diff --git a/net/core/filter.c b/net/core/filter.c
index 9ab94e90d660..d955321d3415 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7683,12 +7683,14 @@ static bool __sock_filter_check_attach_type(int off,
 		switch (attach_type) {
 		case BPF_CGROUP_INET_SOCK_CREATE:
 		case BPF_CGROUP_INET_SOCK_RELEASE:
+		case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 			goto full_access;
 		default:
 			return false;
 		}
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
 		switch (attach_type) {
+		case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 		case BPF_CGROUP_INET4_POST_BIND:
 			goto read_only;
 		default:
@@ -7696,6 +7698,7 @@ static bool __sock_filter_check_attach_type(int off,
 		}
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 		switch (attach_type) {
+		case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 		case BPF_CGROUP_INET6_POST_BIND:
 			goto read_only;
 		default:
@@ -7703,6 +7706,7 @@ static bool __sock_filter_check_attach_type(int off,
 		}
 	case bpf_ctx_range(struct bpf_sock, src_port):
 		switch (attach_type) {
+		case BPF_CGROUP_INET_SOCK_POST_CONNECT:
 		case BPF_CGROUP_INET4_POST_BIND:
 		case BPF_CGROUP_INET6_POST_BIND:
 			goto read_only;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b94fa8eb831b..568654cafa48 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -574,7 +574,10 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
-	return sk->sk_prot->connect(sk, uaddr, addr_len);
+	err = sk->sk_prot->connect(sk, uaddr, addr_len);
+	if (!err)
+		err = BPF_CGROUP_RUN_PROG_INET_SOCK_POST_CONNECT_LOCKED(sk);
+	return err;
 }
 EXPORT_SYMBOL(inet_dgram_connect);
 
@@ -723,6 +726,8 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	lock_sock(sock->sk);
 	err = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
+	if (!err)
+		err = BPF_CGROUP_RUN_PROG_INET_SOCK_POST_CONNECT(sock->sk);
 	release_sock(sock->sk);
 	return err;
 }
-- 
2.30.0.284.gd98b1dd5eaa7-goog