This fixes two bugs: 1. The scsi cmd task could be completed and the abort could timeout while we are running qedi_tmf_work so we need to get a ref to the task. 2. If qedi_tmf_work's qedi_wait_for_cleanup_request call times out we will also force the clean up of the qedi_work_map but qedi_process_cmd_cleanup_resp could still be accessing the qedi_cmd for the abort TMF. We can then race where qedi_process_cmd_cleanup_resp is still accessing the mtask's qedi_cmd but libiscsi has escalated to session level cleanup and is cleaning up the mtask while we are still accessing it. To fix this issue we extend where we hold the tmf_work_lock and back_lock so the qedi_process_cmd_cleanup_resp access is serialized with the cleanup done in qedi_tmf_work and any completion handling for the iscsi_task. Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> Reviewed-by: Manish Rangankar <mrangankar@xxxxxxxxxxx> --- drivers/scsi/qedi/qedi_fw.c | 110 ++++++++++++++++++--------------- drivers/scsi/qedi/qedi_iscsi.h | 1 + 2 files changed, 61 insertions(+), 50 deletions(-) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index ad4357e4c15d..c5699421ec37 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -729,7 +729,6 @@ static void qedi_process_nopin_local_cmpl(struct qedi_ctx *qedi, static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, struct iscsi_cqe_solicited *cqe, - struct iscsi_task *task, struct iscsi_conn *conn) { struct qedi_work_map *work, *work_tmp; @@ -742,7 +741,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, u32 iscsi_cid; struct qedi_conn *qedi_conn; struct qedi_cmd *dbg_cmd; - struct iscsi_task *mtask; + struct iscsi_task *mtask, *task; struct iscsi_tm *tmf_hdr = NULL; iscsi_cid = cqe->conn_id; @@ -768,6 +767,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, } found = 1; mtask = qedi_cmd->task; + task = work->ctask; tmf_hdr = (struct iscsi_tm *)mtask->hdr; rtid = work->rtid; @@ -776,52 +776,47 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->list_tmf_work = NULL; } } - spin_unlock_bh(&qedi_conn->tmf_work_lock); - - if (found) { - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, - "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", - proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id); - - if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == - ISCSI_TM_FUNC_ABORT_TASK) { - spin_lock_bh(&conn->session->back_lock); - protoitt = build_itt(get_itt(tmf_hdr->rtt), - conn->session->age); - task = iscsi_itt_to_task(conn, protoitt); - - spin_unlock_bh(&conn->session->back_lock); + if (!found) { + spin_unlock_bh(&qedi_conn->tmf_work_lock); + goto check_cleanup_reqs; + } - if (!task) { - QEDI_NOTICE(&qedi->dbg_ctx, - "IO task completed, tmf rtt=0x%x, cid=0x%x\n", - get_itt(tmf_hdr->rtt), - qedi_conn->iscsi_conn_id); - return; - } + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, + "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", + proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id); + + spin_lock_bh(&conn->session->back_lock); + if (iscsi_task_is_completed(task)) { + QEDI_NOTICE(&qedi->dbg_ctx, + "IO task completed, tmf rtt=0x%x, cid=0x%x\n", + get_itt(tmf_hdr->rtt), qedi_conn->iscsi_conn_id); + goto unlock; + } - dbg_cmd = task->dd_data; + dbg_cmd = task->dd_data; - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, - "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n", - get_itt(tmf_hdr->rtt), get_itt(task->itt), - dbg_cmd->task_id, qedi_conn->iscsi_conn_id); + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, + "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n", + get_itt(tmf_hdr->rtt), get_itt(task->itt), dbg_cmd->task_id, + qedi_conn->iscsi_conn_id); - if (qedi_cmd->state == CLEANUP_WAIT_FAILED) - qedi_cmd->state = CLEANUP_RECV; + spin_lock(&qedi_conn->list_lock); + if (likely(dbg_cmd->io_cmd_in_list)) { + dbg_cmd->io_cmd_in_list = false; + list_del_init(&dbg_cmd->io_cmd); + qedi_conn->active_cmd_count--; + } + spin_unlock(&qedi_conn->list_lock); + qedi_cmd->state = CLEANUP_RECV; +unlock: + spin_unlock_bh(&conn->session->back_lock); + spin_unlock_bh(&qedi_conn->tmf_work_lock); + wake_up_interruptible(&qedi_conn->wait_queue); + return; - spin_lock(&qedi_conn->list_lock); - if (likely(dbg_cmd->io_cmd_in_list)) { - dbg_cmd->io_cmd_in_list = false; - list_del_init(&dbg_cmd->io_cmd); - qedi_conn->active_cmd_count--; - } - spin_unlock(&qedi_conn->list_lock); - qedi_cmd->state = CLEANUP_RECV; - wake_up_interruptible(&qedi_conn->wait_queue); - } - } else if (qedi_conn->cmd_cleanup_req > 0) { +check_cleanup_reqs: + if (qedi_conn->cmd_cleanup_req > 0) { spin_lock_bh(&conn->session->back_lock); qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); protoitt = build_itt(ptmp_itt, conn->session->age); @@ -844,6 +839,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID, "Freeing tid=0x%x for cid=0x%x\n", cqe->itid, qedi_conn->iscsi_conn_id); + qedi_clear_task_idx(qedi_conn->qedi, cqe->itid); } else { qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); @@ -946,8 +942,7 @@ void qedi_fp_process_cqes(struct qedi_work *work) goto exit_fp_process; case ISCSI_CQE_TYPE_TASK_CLEANUP: QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "CleanUp CqE\n"); - qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, task, - conn); + qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, conn); goto exit_fp_process; default: QEDI_ERR(&qedi->dbg_ctx, "Error cqe.\n"); @@ -1374,12 +1369,22 @@ static void qedi_tmf_work(struct work_struct *work) tmf_hdr = (struct iscsi_tm *)mtask->hdr; set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); - ctask = iscsi_itt_to_task(conn, tmf_hdr->rtt); - if (!ctask || !ctask->sc) { + spin_lock_bh(&conn->session->back_lock); + ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt); + if (!ctask || iscsi_task_is_completed(ctask)) { + spin_unlock_bh(&conn->session->back_lock); QEDI_ERR(&qedi->dbg_ctx, "Task already completed\n"); - goto abort_ret; + goto clear_cleanup; } + /* + * libiscsi gets a ref before sending the abort, but if libiscsi times + * it out then it could release it and the cmd could complete from + * under us. + */ + __iscsi_get_task(ctask); + spin_unlock_bh(&conn->session->back_lock); + cmd = (struct qedi_cmd *)ctask->dd_data; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Abort tmf rtt=0x%x, cmd itt=0x%x, cmd tid=0x%x, cid=0x%x\n", @@ -1389,19 +1394,20 @@ static void qedi_tmf_work(struct work_struct *work) if (qedi_do_not_recover) { QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n", qedi_do_not_recover); - goto abort_ret; + goto put_task; } list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC); if (!list_work) { QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n"); - goto abort_ret; + goto put_task; } qedi_cmd->type = TYPEIO; list_work->qedi_cmd = qedi_cmd; list_work->rtid = cmd->task_id; list_work->state = QEDI_WORK_SCHEDULED; + list_work->ctask = ctask; qedi_cmd->list_tmf_work = list_work; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, @@ -1434,7 +1440,9 @@ static void qedi_tmf_work(struct work_struct *work) qedi_cmd->task_id = tid; qedi_send_iscsi_tmf(qedi_conn, qedi_cmd->task); -abort_ret: +put_task: + iscsi_put_task(ctask); +clear_cleanup: clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); return; @@ -1455,6 +1463,8 @@ static void qedi_tmf_work(struct work_struct *work) } spin_unlock(&qedi_conn->list_lock); + iscsi_put_task(ctask); + clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); } diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index 39dc27c85e3c..68ef519f5480 100644 --- a/drivers/scsi/qedi/qedi_iscsi.h +++ b/drivers/scsi/qedi/qedi_iscsi.h @@ -212,6 +212,7 @@ struct qedi_cmd { struct qedi_work_map { struct list_head list; struct qedi_cmd *qedi_cmd; + struct iscsi_task *ctask; int rtid; int state; -- 2.25.1