Potential sleeping issue exists in the following processes: smc_switch_conns spin_lock_bh(&conn->send_lock) smc_switch_link_and_count smcr_link_put __smcr_link_clear smc_lgr_put __smc_lgr_free smc_lgr_free_bufs __smc_lgr_free_bufs smc_buf_free smcr_buf_free smcr_buf_unmap_link smc_ib_put_memory_region ib_dereg_mr ib_dereg_mr_user mr->device->ops.dereg_mr If scheduling exists when the IB driver implements .dereg_mr hook function, the bug "scheduling while atomic" will occur. For example, cxgb4 and efa driver. Use mutex lock instead of spin lock to fix it. Fixes: 20c9398d3309 ("net/smc: Resolve the race between SMC-R link access and clear") Signed-off-by: Zhengchao Shao <shaozhengchao@xxxxxxxxxx> --- net/smc/af_smc.c | 2 +- net/smc/smc.h | 2 +- net/smc/smc_cdc.c | 14 +++++++------- net/smc/smc_core.c | 8 ++++---- net/smc/smc_tx.c | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ad5bab6a44b6..c0a228def6da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -386,7 +386,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); - spin_lock_init(&smc->conn.send_lock); + mutex_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); mutex_init(&smc->clcsock_release_lock); smc_init_saved_callbacks(smc); diff --git a/net/smc/smc.h b/net/smc/smc.h index 18c8b7870198..ba8efed240e3 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -194,7 +194,7 @@ struct smc_connection { atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ - spinlock_t send_lock; /* protect wr_sends */ + struct mutex send_lock; /* protect wr_sends */ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe * - inc when post wqe, * - dec on polled tx cqe diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 3c06625ceb20..f8ad0035905a 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -186,10 +186,10 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) if (rc) goto put_out; - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); if (link != conn->lnk) { /* link of connection changed, try again one time*/ - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); smc_wr_tx_put_slot(link, (struct smc_wr_tx_pend_priv *)pend); smc_wr_tx_link_put(link); @@ -199,7 +199,7 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) goto again; } rc = smc_cdc_msg_send(conn, wr_buf, pend); - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); put_out: smc_wr_tx_link_put(link); return rc; @@ -214,9 +214,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return -EPIPE; if (conn->lgr->is_smcd) { - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); rc = smcd_cdc_msg_send(conn); - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); } else { rc = smcr_cdc_get_slot_and_msg_send(conn); } @@ -308,10 +308,10 @@ static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc, if (diff < 0) { /* diff larger than 0x7fff */ /* drop connection */ conn->out_of_sync = 1; /* prevent any further receives */ - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; conn->lnk = link; - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); sock_hold(&smc->sk); /* sock_put in abort_work */ if (!queue_work(smc_close_wq, &conn->abort_work)) sock_put(&smc->sk); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 9b84d5897aa5..21e0d95ab8c8 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1083,9 +1083,9 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || smc->sk.sk_state == SMC_PEERABORTWAIT || smc->sk.sk_state == SMC_PROCESSABORT) { - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); smc_switch_link_and_count(conn, to_lnk); - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); continue; } sock_hold(&smc->sk); @@ -1095,10 +1095,10 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, if (rc) goto err_out; /* avoid race with smcr_tx_sndbuf_nonempty() */ - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); smc_switch_link_and_count(conn, to_lnk); rc = smc_switch_cursor(smc, pend, wr_buf); - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); sock_put(&smc->sk); if (rc) goto err_out; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 214ac3cbcf9a..b6790bd82b4e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -573,7 +573,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) return rc; } - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); if (link != conn->lnk) { /* link of connection changed, tx_work will restart */ smc_wr_tx_put_slot(link, @@ -597,7 +597,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) } out_unlock: - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); smc_wr_tx_link_put(link); return rc; } @@ -607,7 +607,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; int rc = 0; - spin_lock_bh(&conn->send_lock); + mutex_lock(&conn->send_lock); if (!pflags->urg_data_present) rc = smc_tx_rdma_writes(conn, NULL); if (!rc) @@ -617,7 +617,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) pflags->urg_data_pending = 0; pflags->urg_data_present = 0; } - spin_unlock_bh(&conn->send_lock); + mutex_unlock(&conn->send_lock); return rc; } -- 2.34.1