On 3/10/23 4:04 AM, Maurizio Lombardi wrote: > If the initiator suddenly stops sending data during a login while > keeping the TCP connection open, the sk_data_ready callback won't > schedule the login_work and the latter > will never timeout and release the login semaphore. > > All the other login operations will therefore get stuck waiting > for the semaphore to be released. > > Add a timer to check if the initiator became unresponsive, this is how it > works: > > * The timer is started in iscsi_target_start_negotiation(), before > clearing tre LOGIN_FLAGS_INITIAL_PDU flag. > * If iscsi_target_do_login() returned a non-zero value, this means that > the login operation either failed or didn't require additional PDUs; > in this case the timer is immediately stopped. > * If iscsi_target_do_login() returned zero then the control of > the login operation is passed to login_work that will take over the > responsibility of releasing the login semaphore and stopping the timer. > * If login_work gets stuck, the login timer will expire and > will force the login to fail (by sending a SIGINT to the login kthread > and by closing the socket). > > Signed-off-by: Maurizio Lombardi <mlombard@xxxxxxxxxx> > --- > drivers/target/iscsi/iscsi_target_login.c | 1 + > drivers/target/iscsi/iscsi_target_nego.c | 56 ++++++++++------------- > drivers/target/iscsi/iscsi_target_util.c | 26 +++++++++++ > drivers/target/iscsi/iscsi_target_util.h | 3 ++ > include/target/iscsi/iscsi_target_core.h | 1 + > 5 files changed, 56 insertions(+), 31 deletions(-) > > diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c > index 27e448c2d066..bb7d5a596266 100644 > --- a/drivers/target/iscsi/iscsi_target_login.c > +++ b/drivers/target/iscsi/iscsi_target_login.c > @@ -1127,6 +1127,7 @@ static struct iscsit_conn *iscsit_alloc_conn(struct iscsi_np *np) > timer_setup(&conn->nopin_response_timer, > iscsit_handle_nopin_response_timeout, 0); > timer_setup(&conn->nopin_timer, iscsit_handle_nopin_timeout, 0); > + timer_setup(&conn->login_timer, iscsit_login_timeout, 0); > > if (iscsit_conn_set_transport(conn, np->np_transport) < 0) > goto free_conn; > diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c > index 24040c118e49..f901a7231c48 100644 > --- a/drivers/target/iscsi/iscsi_target_nego.c > +++ b/drivers/target/iscsi/iscsi_target_nego.c > @@ -472,12 +472,18 @@ static int iscsi_target_do_login(struct iscsit_conn *, struct iscsi_login *); > > static bool __iscsi_target_sk_check_close(struct sock *sk) > { > - if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { > - pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE," > + switch (sk->sk_state) { > + case TCP_FIN_WAIT1: > + case TCP_FIN_WAIT2: > + case TCP_CLOSE_WAIT: > + case TCP_LAST_ACK: > + case TCP_CLOSE: > + pr_debug("__iscsi_target_sk_check_close: socket closing," > "returning TRUE\n"); Don't need to break up a string. We do it a lot in the lio code, but we've been trying not to in new code. > return true; > + default: > + return false; > } > - return false; > } > > static bool iscsi_target_sk_check_close(struct iscsit_conn *conn) > @@ -535,25 +541,6 @@ static void iscsi_target_login_drop(struct iscsit_conn *conn, struct iscsi_login > iscsi_target_login_sess_out(conn, zero_tsih, true); > } > > -struct conn_timeout { > - struct timer_list timer; > - struct iscsit_conn *conn; > -}; > - > -static void iscsi_target_login_timeout(struct timer_list *t) > -{ > - struct conn_timeout *timeout = from_timer(timeout, t, timer); > - struct iscsit_conn *conn = timeout->conn; > - > - pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); > - > - if (conn->login_kworker) { > - pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", > - conn->login_kworker->comm, conn->login_kworker->pid); > - send_sig(SIGINT, conn->login_kworker, 1); > - } > -} > - > static void iscsi_target_do_login_rx(struct work_struct *work) > { > struct iscsit_conn *conn = container_of(work, > @@ -562,7 +549,6 @@ static void iscsi_target_do_login_rx(struct work_struct *work) > struct iscsi_np *np = login->np; > struct iscsi_portal_group *tpg = conn->tpg; > struct iscsi_tpg_np *tpg_np = conn->tpg_np; > - struct conn_timeout timeout; > int rc, zero_tsih = login->zero_tsih; > bool state; > > @@ -597,17 +583,13 @@ static void iscsi_target_do_login_rx(struct work_struct *work) > goto err; > } > > - conn->login_kworker = current; > allow_signal(SIGINT); > - > - timeout.conn = conn; > - timer_setup_on_stack(&timeout.timer, iscsi_target_login_timeout, 0); > - mod_timer(&timeout.timer, jiffies + TA_LOGIN_TIMEOUT * HZ); > - pr_debug("Starting login timer for %s/%d\n", current->comm, current->pid); > + iscsit_start_login_timer(conn); > + conn->login_kworker = current;> > rc = conn->conn_transport->iscsit_get_login_rx(conn, login); > - del_timer_sync(&timeout.timer); > - destroy_timer_on_stack(&timeout.timer); > + > + iscsit_stop_login_timer(conn); > flush_signals(current); > conn->login_kworker = NULL; > > @@ -646,6 +628,13 @@ static void iscsi_target_do_login_rx(struct work_struct *work) > if (iscsi_target_sk_check_and_clear(conn, > LOGIN_FLAGS_WRITE_ACTIVE)) > goto err; > + > + /* > + * Restart the login timer to prevent the > + * login process from getting stuck if the initiator I would fix up the formatting so the first line is longer. > + * stops sending data. > + */ > + iscsit_start_login_timer(conn); > } else if (rc == 1) { > cancel_delayed_work(&conn->login_work); > iscsi_target_nego_release(conn); > @@ -657,6 +646,7 @@ static void iscsi_target_do_login_rx(struct work_struct *work) > err: > iscsi_target_restore_sock_callbacks(conn); > cancel_delayed_work(&conn->login_work); > + iscsit_stop_login_timer(conn);> iscsi_target_login_drop(conn, login); > iscsit_deaccess_np(np, tpg, tpg_np); > } > @@ -1358,6 +1348,9 @@ int iscsi_target_start_negotiation( > set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); > write_unlock_bh(&sk->sk_callback_lock); > } > + > + iscsit_start_login_timer(conn); At this time, we have the np->np_login_timer running right? Don't we only need to start this new timer when we know there are more PDUs and the connection is good (iscsi_target_do_login returns 0 and iscsi_target_sk_check_and_clear returns 0)? I think you can just kill np timer and only use the login_timer for both cases. So I mean set the thread to kill as the login one and start this login_timer in __iscsi_target_login_thread where we used to call iscsi_start_login_thread_timer. You would then mod the timer when we transition from iscsi_target_start_negotiation to waiting for the next PDU. > + > /* > * If iscsi_target_do_login returns zero to signal more PDU > * exchanges are required to complete the login, go ahead and > @@ -1377,6 +1370,7 @@ int iscsi_target_start_negotiation( > } > if (ret != 0) { > cancel_delayed_work_sync(&conn->login_work); > + iscsit_stop_login_timer(conn); > iscsi_target_nego_release(conn); > } > > diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c > index 26dc8ed3045b..414e883c5a0d 100644 > --- a/drivers/target/iscsi/iscsi_target_util.c > +++ b/drivers/target/iscsi/iscsi_target_util.c > @@ -1040,6 +1040,32 @@ void iscsit_stop_nopin_timer(struct iscsit_conn *conn) > spin_unlock_bh(&conn->nopin_timer_lock); > } > > +void iscsit_login_timeout(struct timer_list *t) > +{ > + struct iscsit_conn *conn = from_timer(conn, t, login_timer); > + > + pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n"); > + > + if (conn->login_kworker) { > + pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n", > + conn->login_kworker->comm, conn->login_kworker->pid); > + send_sig(SIGINT, conn->login_kworker, 1); > + } > + kernel_sock_shutdown(conn->sock, SHUT_RDWR); For isert and cxgbit we won't have conn->sock set so I think you need some sort of callout for those drivers, or maybe set LOGIN_FLAGS_CLOSED and queue the login_work. Maybe the latter will work for all drivers as well. You probably need some extra locking and LOGIN_FLAGS checks to handle an issue similar to below. If we do need to do the kernel_sock_shutdown we might need to add some code to not call it twice (I'm not sure it's fully supported). It looks like we can race and do: 1. login_work has just been queued. 2. But the login timer fires. We run kernel_sock_shutdown and iscsi_target_sk_state_change starts to run but has not yet taken the sk_callback_lock because.. 3. iscsit_get_login_rx already started and passed the iscsi_target_sk_check_close check. It re-arms the timer and calls iscsit_get_login_rx. 4. iscsi_target_sk_state_change takes the lock and sets LOGIN_FLAGS_CLOSED. 5. We timeout again from #3 since we called kernel_sock_shutdown before.