From: Martin KaFai Lau <martin.lau@xxxxxxxxxx>
When a bad bpf prog '.init' calls
bpf_setsockopt(TCP_CONGESTION, "itself"), it will trigger this loop:
.init => bpf_setsockopt(tcp_cc) => .init => bpf_setsockopt(tcp_cc) ...
... => .init => bpf_setsockopt(tcp_cc).
It was prevented by the prog->active counter before but the prog->active
detection cannot be used in struct_ops as explained in the earlier
patch of the set.
In this patch, the second bpf_setsockopt(tcp_cc) is not allowed
in order to break the loop. This is done by checking the
previous bpf_run_ctx has saved the same sk pointer in the
bpf_cookie.
Note that this essentially limits only the first '.init' can
call bpf_setsockopt(TCP_CONGESTION) to pick a fallback cc (eg. peer
does not support ECN) and the second '.init' cannot fallback to
another cc. This applies even the second
bpf_setsockopt(TCP_CONGESTION) will not cause a loop.
Signed-off-by: Martin KaFai Lau <martin.lau@xxxxxxxxxx>
---
include/linux/filter.h | 3 +++
net/core/filter.c | 4 ++--
net/ipv4/bpf_tcp_ca.c | 54 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 98e28126c24b..9942ecc68a45 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -911,6 +911,9 @@ int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len);
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
+int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen);
+
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
#define __bpf_call_base_args \
((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
diff --git a/net/core/filter.c b/net/core/filter.c
index f4cea3ff994a..e56a1ebcf1bc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5244,8 +5244,8 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
return -EINVAL;
}
-static int _bpf_setsockopt(struct sock *sk, int level, int optname,
- char *optval, int optlen)
+int _bpf_setsockopt(struct sock *sk, int level, int optname,
+ char *optval, int optlen)
{
if (sk_fullsock(sk))
sock_owned_by_me(sk);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 6da16ae6a962..a9f2cab5ffbc 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -144,6 +144,57 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_5(bpf_init_ops_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ struct bpf_tramp_run_ctx *run_ctx, *saved_run_ctx;
+ int ret;
+
+ if (optname != TCP_CONGESTION)
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+
+ run_ctx = (struct bpf_tramp_run_ctx *)current->bpf_ctx;
+ if (unlikely(run_ctx->saved_run_ctx &&
+ run_ctx->saved_run_ctx->type == BPF_RUN_CTX_TYPE_STRUCT_OPS)) {
+ saved_run_ctx = (struct bpf_tramp_run_ctx *)run_ctx->saved_run_ctx;
+ /* It stops this looping
+ *
+ * .init => bpf_setsockopt(tcp_cc) => .init =>
+ * bpf_setsockopt(tcp_cc)" => .init => ....
+ *
+ * The second bpf_setsockopt(tcp_cc) is not allowed
+ * in order to break the loop when both .init
+ * are the same bpf prog.
+ *
+ * This applies even the second bpf_setsockopt(tcp_cc)
+ * does not cause a loop. This limits only the first
+ * '.init' can call bpf_setsockopt(TCP_CONGESTION) to
+ * pick a fallback cc (eg. peer does not support ECN)
+ * and the second '.init' cannot fallback to
+ * another cc.
+ */
+ if (saved_run_ctx->bpf_cookie == (uintptr_t)sk)
+ return -EBUSY;
+ }
+
+ run_ctx->bpf_cookie = (uintptr_t)sk;
+ ret = _bpf_setsockopt(sk, level, optname, optval, optlen);
+ run_ctx->bpf_cookie = 0;