> -----Original Message----- > From: Mike Christie <michael.christie@xxxxxxxxxx> > Sent: Sunday, April 11, 2021 12:10 AM > To: lduncan@xxxxxxxx; martin.petersen@xxxxxxxxxx; Manish Rangankar > <mrangankar@xxxxxxxxxxx>; Santosh Vernekar <svernekar@xxxxxxxxxxx>; > linux-scsi@xxxxxxxxxxxxxxx; jejb@xxxxxxxxxxxxx > Cc: Mike Christie <michael.christie@xxxxxxxxxx> > Subject: [EXT] [PATCH 12/13] scsi: qedi: make sure tmf works are done before > disconnect > > External Email > > ---------------------------------------------------------------------- > We need to make sure that abort and reset completion work has completed > before ep_disconnect returns. After ep_disconnect we can't manipulate cmds > because libiscsi will call conn_stop and take onwership. > > We are trying to make sure abort work and reset completion work has > completed before we do the cmd clean up in ep_disconnect. The problem is > that: > > 1. the work function sets the QEDI_CONN_FW_CLEANUP bit, so if the work was > still pending we would not see the bit set. We need to do this before the work is > queued. > > 2. If we had multiple works queued then we could break from the loop in > qedi_ep_disconnect early because when abort work 1 completes it could clear > QEDI_CONN_FW_CLEANUP. qedi_ep_disconnect could then see that before > work 2 has run. > > 3. A tmf reset completion work could run after ep_disconnect starts cleaning up > cmds via qedi_clearsq. ep_disconnect's call to qedi_clearsq > -> qedi_cleanup_all_io would might think it's done cleaning up cmds, > but the reset completion work could still be running. We then return from > ep_disconnect while still doing cleanup. > > This replaces the bit with a counter and adds a bool to prevent new works from > starting from the completion path once a ep_disconnect starts. > > Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> > --- > drivers/scsi/qedi/qedi_fw.c | 46 +++++++++++++++++++++------------- > drivers/scsi/qedi/qedi_iscsi.c | 23 +++++++++++------ > drivers/scsi/qedi/qedi_iscsi.h | 4 +-- > 3 files changed, 47 insertions(+), 26 deletions(-) > > diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index > 475cb7823cf1..13dd06915d74 100644 > --- a/drivers/scsi/qedi/qedi_fw.c > +++ b/drivers/scsi/qedi/qedi_fw.c > @@ -156,7 +156,6 @@ static void qedi_tmf_resp_work(struct work_struct > *work) > struct iscsi_tm_rsp *resp_hdr_ptr; > int rval = 0; > > - set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > resp_hdr_ptr = (struct iscsi_tm_rsp *)qedi_cmd->tmf_resp_buf; > > rval = qedi_cleanup_all_io(qedi, qedi_conn, qedi_cmd->task, true); @@ > -169,7 +168,10 @@ static void qedi_tmf_resp_work(struct work_struct *work) > > exit_tmf_resp: > kfree(resp_hdr_ptr); > - clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > + > + spin_lock(&qedi_conn->tmf_work_lock); > + qedi_conn->fw_cleanup_works--; > + spin_unlock(&qedi_conn->tmf_work_lock); > } > > static void qedi_process_tmf_resp(struct qedi_ctx *qedi, @@ -225,16 +227,25 > @@ static void qedi_process_tmf_resp(struct qedi_ctx *qedi, > } > spin_unlock(&qedi_conn->list_lock); > > - if (((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == > - ISCSI_TM_FUNC_LOGICAL_UNIT_RESET) || > - ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == > - ISCSI_TM_FUNC_TARGET_WARM_RESET) || > - ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == > - ISCSI_TM_FUNC_TARGET_COLD_RESET)) { > + spin_lock(&qedi_conn->tmf_work_lock); > + switch (tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) { > + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: > + case ISCSI_TM_FUNC_TARGET_WARM_RESET: > + case ISCSI_TM_FUNC_TARGET_COLD_RESET: > + if (qedi_conn->ep_disconnect_starting) { > + /* Session is down so ep_disconnect will clean up */ > + spin_unlock(&qedi_conn->tmf_work_lock); > + goto unblock_sess; > + } > + > + qedi_conn->fw_cleanup_works++; > + spin_unlock(&qedi_conn->tmf_work_lock); > + > INIT_WORK(&qedi_cmd->tmf_work, qedi_tmf_resp_work); > queue_work(qedi->tmf_thread, &qedi_cmd->tmf_work); > goto unblock_sess; > } > + spin_unlock(&qedi_conn->tmf_work_lock); > > __iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr_ptr, NULL, 0); > kfree(resp_hdr_ptr); > @@ -1361,7 +1372,6 @@ static void qedi_abort_work(struct work_struct > *work) > > mtask = qedi_cmd->task; > tmf_hdr = (struct iscsi_tm *)mtask->hdr; > - set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > > spin_lock_bh(&conn->session->back_lock); > ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt); @@ -1427,11 +1437,7 > @@ static void qedi_abort_work(struct work_struct *work) > > send_iscsi_tmf(qedi_conn, qedi_cmd->task, ctask); > > -put_task: > - iscsi_put_task(ctask); > -clear_cleanup: > - clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > - return; > + goto put_task; > > ldel_exit: > spin_lock_bh(&qedi_conn->tmf_work_lock); > @@ -1449,10 +1455,12 @@ static void qedi_abort_work(struct work_struct > *work) > qedi_conn->active_cmd_count--; > } > spin_unlock(&qedi_conn->list_lock); > - > +put_task: > iscsi_put_task(ctask); > - > - clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > +clear_cleanup: > + spin_lock(&qedi_conn->tmf_work_lock); > + qedi_conn->fw_cleanup_works--; > + spin_unlock(&qedi_conn->tmf_work_lock); > } > > static int send_iscsi_tmf(struct qedi_conn *qedi_conn, struct iscsi_task *mtask, > @@ -1547,6 +1555,10 @@ int qedi_send_iscsi_tmf(struct qedi_conn > *qedi_conn, struct iscsi_task *mtask) > > switch (tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) { > case ISCSI_TM_FUNC_ABORT_TASK: > + spin_lock(&qedi_conn->tmf_work_lock); > + qedi_conn->fw_cleanup_works++; > + spin_unlock(&qedi_conn->tmf_work_lock); > + > INIT_WORK(&qedi_cmd->tmf_work, qedi_abort_work); > queue_work(qedi->tmf_thread, &qedi_cmd->tmf_work); > break; > diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index > 536d6653ef8e..8bbdd45ff2a1 100644 > --- a/drivers/scsi/qedi/qedi_iscsi.c > +++ b/drivers/scsi/qedi/qedi_iscsi.c > @@ -592,7 +592,11 @@ static int qedi_conn_start(struct iscsi_cls_conn > *cls_conn) > goto start_err; > } > > - clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > + spin_lock(&qedi_conn->tmf_work_lock); > + qedi_conn->fw_cleanup_works = 0; > + qedi_conn->ep_disconnect_starting = false; > + spin_unlock(&qedi_conn->tmf_work_lock); > + > qedi_conn->abrt_conn = 0; > > rval = iscsi_conn_start(cls_conn); > @@ -1009,7 +1013,6 @@ static void qedi_ep_disconnect(struct iscsi_endpoint > *ep) > int ret = 0; > int wait_delay; > int abrt_conn = 0; > - int count = 10; > > wait_delay = 60 * HZ + DEF_MAX_RT_TIME; > qedi_ep = ep->dd_data; > @@ -1027,13 +1030,19 @@ static void qedi_ep_disconnect(struct > iscsi_endpoint *ep) > iscsi_suspend_queue(conn); > abrt_conn = qedi_conn->abrt_conn; > > - while (count--) { > - if (!test_bit(QEDI_CONN_FW_CLEANUP, > - &qedi_conn->flags)) { > - break; > - } > + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, > + "cid=0x%x qedi_ep=%p waiting for %d tmfs\n", > + qedi_ep->iscsi_cid, qedi_ep, > + qedi_conn->fw_cleanup_works); > + > + spin_lock(&qedi_conn->tmf_work_lock); > + qedi_conn->ep_disconnect_starting = true; > + while (qedi_conn->fw_cleanup_works > 0) { > + spin_unlock(&qedi_conn->tmf_work_lock); > msleep(1000); > + spin_lock(&qedi_conn->tmf_work_lock); > } > + spin_unlock(&qedi_conn->tmf_work_lock); > > if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) { > if (qedi_do_not_recover) { > diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index > 68ef519f5480..758735209e15 100644 > --- a/drivers/scsi/qedi/qedi_iscsi.h > +++ b/drivers/scsi/qedi/qedi_iscsi.h > @@ -169,8 +169,8 @@ struct qedi_conn { > struct list_head tmf_work_list; > wait_queue_head_t wait_queue; > spinlock_t tmf_work_lock; /* tmf work lock */ > - unsigned long flags; > -#define QEDI_CONN_FW_CLEANUP 1 > + bool ep_disconnect_starting; > + int fw_cleanup_works; > }; > > struct qedi_cmd { > -- > 2.25.1 Thanks, Reviewed-by: Manish Rangankar <mrangankar@xxxxxxxxxxx>