On Tue, 16 Aug 2022 at 17:34, David Howells <dhowells@xxxxxxxxxx> wrote: > > Fix this by adding a new helper, __locked_read_sk_user_data_with_flags() > that checks to see if sk->sk_callback_lock() is held and use that here > instead. Hi, I wonder if we make this more geniric, for I think maybe the future code who use __rcu_dereference_sk_user_data_with_flags() may also meet this bug. To be more specific, maybe we can refactor __rcu_dereference_sk_user_data_with_flags() to __rcu_dereference_sk_user_data_with_flags_check(), like rcu_dereference() and rcu_dereference_check(). Maybe: diff --git a/include/net/sock.h b/include/net/sock.h index 05a1bbdf5805..cf123954eab9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -578,18 +578,27 @@ static inline bool sk_user_data_is_nocopy(const struct sock *sk) #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) /** - * __rcu_dereference_sk_user_data_with_flags - return the pointer - * only if argument flags all has been set in sk_user_data. Otherwise - * return NULL + * __rcu_dereference_sk_user_data_with_flags_check - return the pointer + * only if argument flags all has been set in sk_user_data, with debug + * checking. Otherwise return NULL * - * @sk: socket - * @flags: flag bits + * Do __rcu_dereference_sk_user_data_with_flags(), but check that the + * conditions under which the rcu dereference will take place are correct, + * which is a bit like rcu_dereference_check() and rcu_derefence(). + * + * @sk : socket + * @flags : flag bits + * @condition : the conditions under which the rcu dereference will + * take place */ static inline void * -__rcu_dereference_sk_user_data_with_flags(const struct sock *sk, - uintptr_t flags) +__rcu_dereference_sk_user_data_with_flags_check(const struct sock *sk, + uintptr_t flags, bool condition) { - uintptr_t sk_user_data = (uintptr_t)rcu_dereference(__sk_user_data(sk)); + uintptr_t sk_user_data; + + sk_user_data = (uintptr_t)rcu_dereference_check(__sk_user_data(sk), + condition); WARN_ON_ONCE(flags & SK_USER_DATA_PTRMASK); @@ -598,6 +607,8 @@ __rcu_dereference_sk_user_data_with_flags(const struct sock *sk, return NULL; } +#define __rcu_dereference_sk_user_data_with_flags(sk, flags) \ + __rcu_dereference_sk_user_data_with_flags_check(sk, flags, 0) #define rcu_dereference_sk_user_data(sk) \ __rcu_dereference_sk_user_data_with_flags(sk, 0) #define __rcu_assign_sk_user_data_with_flags(sk, ptr, flags) \ > +/** > + * __locked_read_sk_user_data_with_flags - return the pointer > + * only if argument flags all has been set in sk_user_data. Otherwise > + * return NULL > + * > + (uintptr_t)rcu_dereference_check(__sk_user_data(sk), > + lockdep_is_held(&sk->sk_callback_lock)); > diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c > index 85fa9dbfa8bf..82c61612f382 100644 > --- a/kernel/bpf/reuseport_array.c > +++ b/kernel/bpf/reuseport_array.c > @@ -24,7 +24,7 @@ void bpf_sk_reuseport_detach(struct sock *sk) > struct sock __rcu **socks; > > write_lock_bh(&sk->sk_callback_lock); > - socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); > + socks = __locked_read_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); > if (socks) { > WRITE_ONCE(sk->sk_user_data, NULL); > /* Then, as you point out, we can pass condition(lockdep_is_held(&sk->sk_callback_lock)) to __rcu_dereference_sk_user_data_with_flags_check() in order to make compiler happy as below: diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 85fa9dbfa8bf..a772610987c5 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -24,7 +24,10 @@ void bpf_sk_reuseport_detach(struct sock *sk) struct sock __rcu **socks; write_lock_bh(&sk->sk_callback_lock); - socks = __rcu_dereference_sk_user_data_with_flags(sk, SK_USER_DATA_BPF); + socks = __rcu_dereference_sk_user_data_with_flags_check( + sk, SK_USER_DATA_BPF, + lockdep_is_held(&sk->sk_callback_lock)); + if (socks) { WRITE_ONCE(sk->sk_user_data, NULL); /*