From: Cong Wang <cong.wang@xxxxxxxxxxxxx> Technically we don't need lock the sock in the psock work, but we need to prevent this work running in parallel with sock_map_close(). With this, we no longer need to wait for the psock->work synchronously, because when we reach here, either this work is still pending, or blocking on the lock_sock(), or it is completed. We only need to cancel the first case asynchronously, and we need to bail out the second case quickly by checking SK_PSOCK_TX_ENABLED bit. Fixes: 799aa7f98d53 ("skmsg: Avoid lock_sock() in sk_psock_backlog()") Reported-by: Stanislav Fomichev <sdf@xxxxxxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: Jakub Sitnicki <jakub@xxxxxxxxxxxxxx> Signed-off-by: Cong Wang <cong.wang@xxxxxxxxxxxxx> --- include/linux/skmsg.h | 2 +- net/core/skmsg.c | 19 +++++++++++++------ net/core/sock_map.c | 4 ++-- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 48f4b645193b..70d6cb94e580 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -376,7 +376,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) } struct sk_psock *sk_psock_init(struct sock *sk, int node); -void sk_psock_stop(struct sk_psock *psock, bool wait); +void sk_psock_stop(struct sk_psock *psock); #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ca70525621c7..c329e71ea924 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -647,6 +647,11 @@ static void sk_psock_backlog(struct work_struct *work) int ret; mutex_lock(&psock->work_mutex); + lock_sock(psock->sk); + + if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + goto end; + if (unlikely(state->skb)) { spin_lock_bh(&psock->ingress_lock); skb = state->skb; @@ -672,9 +677,12 @@ static void sk_psock_backlog(struct work_struct *work) skb_bpf_redirect_clear(skb); do { ret = -EIO; - if (!sock_flag(psock->sk, SOCK_DEAD)) + if (!sock_flag(psock->sk, SOCK_DEAD)) { + release_sock(psock->sk); ret = sk_psock_handle_skb(psock, skb, off, len, ingress); + lock_sock(psock->sk); + } if (ret <= 0) { if (ret == -EAGAIN) { sk_psock_skb_state(psock, state, skb, @@ -695,6 +703,7 @@ static void sk_psock_backlog(struct work_struct *work) kfree_skb(skb); } end: + release_sock(psock->sk); mutex_unlock(&psock->work_mutex); } @@ -803,16 +812,14 @@ static void sk_psock_link_destroy(struct sk_psock *psock) } } -void sk_psock_stop(struct sk_psock *psock, bool wait) +void sk_psock_stop(struct sk_psock *psock) { spin_lock_bh(&psock->ingress_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_cork_free(psock); __sk_psock_zap_ingress(psock); spin_unlock_bh(&psock->ingress_lock); - - if (wait) - cancel_work_sync(&psock->work); + cancel_work(&psock->work); } static void sk_psock_done_strp(struct sk_psock *psock); @@ -850,7 +857,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); - sk_psock_stop(psock, false); + sk_psock_stop(psock); INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index a660baedd9e7..d4e11d7f459c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1596,7 +1596,7 @@ void sock_map_destroy(struct sock *sk) saved_destroy = psock->saved_destroy; sock_map_remove_links(sk, psock); rcu_read_unlock(); - sk_psock_stop(psock, false); + sk_psock_stop(psock); sk_psock_put(sk, psock); saved_destroy(sk); } @@ -1619,7 +1619,7 @@ void sock_map_close(struct sock *sk, long timeout) saved_close = psock->saved_close; sock_map_remove_links(sk, psock); rcu_read_unlock(); - sk_psock_stop(psock, true); + sk_psock_stop(psock); sk_psock_put(sk, psock); release_sock(sk); saved_close(sk, timeout); -- 2.34.1