When bpf program calling bpf_setsockopt(SOL_SOCKET), it could be run in softirq and doesn't make sense to do the capable check. There was a similar situation in bpf_setsockopt(TCP_CONGESTION). In commit 8d650cdedaab ("tcp: fix tcp_set_congestion_control() use from bpf hook"), tcp_set_congestion_control(..., cap_net_admin) was added to skip the cap check for bpf prog. This patch adds sockopt_ns_capable() and sockopt_capable() for the sk_setsockopt() to use. They will consider the has_current_bpf_ctx() before doing the ns_capable() and capable() test. They are in EXPORT_SYMBOL for the ipv6 module to use in a latter patch. Suggested-by: Stanislav Fomichev <sdf@xxxxxxxxxx> Reviewed-by: Stanislav Fomichev <sdf@xxxxxxxxxx> Signed-off-by: Martin KaFai Lau <kafai@xxxxxx> --- include/net/sock.h | 2 ++ net/core/sock.c | 38 +++++++++++++++++++++++++------------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index b2ff230860c6..72b78c2b6f83 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1723,6 +1723,8 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow) void sockopt_lock_sock(struct sock *sk); void sockopt_release_sock(struct sock *sk); +bool sockopt_ns_capable(struct user_namespace *ns, int cap); +bool sockopt_capable(int cap); /* Used by processes to "lock" a socket state, so that * interrupts and bottom half handlers won't change it diff --git a/net/core/sock.c b/net/core/sock.c index d3683228376f..7ea46e4700fd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1060,6 +1060,18 @@ void sockopt_release_sock(struct sock *sk) } EXPORT_SYMBOL(sockopt_release_sock); +bool sockopt_ns_capable(struct user_namespace *ns, int cap) +{ + return has_current_bpf_ctx() || ns_capable(ns, cap); +} +EXPORT_SYMBOL(sockopt_ns_capable); + +bool sockopt_capable(int cap) +{ + return has_current_bpf_ctx() || capable(cap); +} +EXPORT_SYMBOL(sockopt_capable); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1095,7 +1107,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case SO_DEBUG: - if (val && !capable(CAP_NET_ADMIN)) + if (val && !sockopt_capable(CAP_NET_ADMIN)) ret = -EACCES; else sock_valbool_flag(sk, SOCK_DBG, valbool); @@ -1139,7 +1151,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, break; case SO_SNDBUFFORCE: - if (!capable(CAP_NET_ADMIN)) { + if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1161,7 +1173,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, break; case SO_RCVBUFFORCE: - if (!capable(CAP_NET_ADMIN)) { + if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1188,8 +1200,8 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, case SO_PRIORITY: if ((val >= 0 && val <= 6) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; @@ -1334,8 +1346,8 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1343,8 +1355,8 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, __sock_set_mark(sk, val); break; case SO_RCVMARK: - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } @@ -1378,7 +1390,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: /* allow unprivileged users to decrease the value */ - if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + if ((val > sk->sk_ll_usec) && !sockopt_capable(CAP_NET_ADMIN)) ret = -EPERM; else { if (val < 0) @@ -1388,13 +1400,13 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, } break; case SO_PREFER_BUSY_POLL: - if (valbool && !capable(CAP_NET_ADMIN)) + if (valbool && !sockopt_capable(CAP_NET_ADMIN)) ret = -EPERM; else WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); break; case SO_BUSY_POLL_BUDGET: - if (val > READ_ONCE(sk->sk_busy_poll_budget) && !capable(CAP_NET_ADMIN)) { + if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; } else { if (val < 0 || val > U16_MAX) @@ -1465,7 +1477,7 @@ static int sk_setsockopt(struct sock *sk, int level, int optname, * scheduler has enough safe guards. */ if (sk_txtime.clockid != CLOCK_MONOTONIC && - !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } -- 2.30.2