This fixes two bugs: 1. The scsi cmd task could be completed and the abort could timeout while we are running qedi_tmf_work so we need to get a ref to the task. 2. If qedi_tmf_work's qedi_wait_for_cleanup_request call times out then we will also force the clean up of the qedi_work_map but qedi_process_cmd_cleanup_resp could still be accessing the qedi_cmd for the abort tmf. We can then race where qedi_process_cmd_cleanup_resp is still accessing the mtask's qedi_cmd but libiscsi has escalated to session level cleanup and is cleaning up the mtask while we are still accessing it. Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> --- drivers/scsi/qedi/qedi_fw.c | 110 ++++++++++++++++++--------------- drivers/scsi/qedi/qedi_iscsi.h | 1 + 2 files changed, 61 insertions(+), 50 deletions(-) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index ad4357e4c15d..c5699421ec37 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -729,7 +729,6 @@ static void qedi_process_nopin_local_cmpl(struct qedi_ctx *qedi, static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, struct iscsi_cqe_solicited *cqe, - struct iscsi_task *task, struct iscsi_conn *conn) { struct qedi_work_map *work, *work_tmp; @@ -742,7 +741,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, u32 iscsi_cid; struct qedi_conn *qedi_conn; struct qedi_cmd *dbg_cmd; - struct iscsi_task *mtask; + struct iscsi_task *mtask, *task; struct iscsi_tm *tmf_hdr = NULL; iscsi_cid = cqe->conn_id; @@ -768,6 +767,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, } found = 1; mtask = qedi_cmd->task; + task = work->ctask; tmf_hdr = (struct iscsi_tm *)mtask->hdr; rtid = work->rtid; @@ -776,52 +776,47 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_cmd->list_tmf_work = NULL; } } - spin_unlock_bh(&qedi_conn->tmf_work_lock); - - if (found) { - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, - "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", - proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id); - - if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == - ISCSI_TM_FUNC_ABORT_TASK) { - spin_lock_bh(&conn->session->back_lock); - protoitt = build_itt(get_itt(tmf_hdr->rtt), - conn->session->age); - task = iscsi_itt_to_task(conn, protoitt); - - spin_unlock_bh(&conn->session->back_lock); + if (!found) { + spin_unlock_bh(&qedi_conn->tmf_work_lock); + goto check_cleanup_reqs; + } - if (!task) { - QEDI_NOTICE(&qedi->dbg_ctx, - "IO task completed, tmf rtt=0x%x, cid=0x%x\n", - get_itt(tmf_hdr->rtt), - qedi_conn->iscsi_conn_id); - return; - } + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, + "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", + proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id); + + spin_lock_bh(&conn->session->back_lock); + if (iscsi_task_is_completed(task)) { + QEDI_NOTICE(&qedi->dbg_ctx, + "IO task completed, tmf rtt=0x%x, cid=0x%x\n", + get_itt(tmf_hdr->rtt), qedi_conn->iscsi_conn_id); + goto unlock; + } - dbg_cmd = task->dd_data; + dbg_cmd = task->dd_data; - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, - "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n", - get_itt(tmf_hdr->rtt), get_itt(task->itt), - dbg_cmd->task_id, qedi_conn->iscsi_conn_id); + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, + "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n", + get_itt(tmf_hdr->rtt), get_itt(task->itt), dbg_cmd->task_id, + qedi_conn->iscsi_conn_id); - if (qedi_cmd->state == CLEANUP_WAIT_FAILED) - qedi_cmd->state = CLEANUP_RECV; + spin_lock(&qedi_conn->list_lock); + if (likely(dbg_cmd->io_cmd_in_list)) { + dbg_cmd->io_cmd_in_list = false; + list_del_init(&dbg_cmd->io_cmd); + qedi_conn->active_cmd_count--; + } + spin_unlock(&qedi_conn->list_lock); + qedi_cmd->state = CLEANUP_RECV; +unlock: + spin_unlock_bh(&conn->session->back_lock); + spin_unlock_bh(&qedi_conn->tmf_work_lock); + wake_up_interruptible(&qedi_conn->wait_queue); + return; - spin_lock(&qedi_conn->list_lock); - if (likely(dbg_cmd->io_cmd_in_list)) { - dbg_cmd->io_cmd_in_list = false; - list_del_init(&dbg_cmd->io_cmd); - qedi_conn->active_cmd_count--; - } - spin_unlock(&qedi_conn->list_lock); - qedi_cmd->state = CLEANUP_RECV; - wake_up_interruptible(&qedi_conn->wait_queue); - } - } else if (qedi_conn->cmd_cleanup_req > 0) { +check_cleanup_reqs: + if (qedi_conn->cmd_cleanup_req > 0) { spin_lock_bh(&conn->session->back_lock); qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); protoitt = build_itt(ptmp_itt, conn->session->age); @@ -844,6 +839,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID, "Freeing tid=0x%x for cid=0x%x\n", cqe->itid, qedi_conn->iscsi_conn_id); + qedi_clear_task_idx(qedi_conn->qedi, cqe->itid); } else { qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); @@ -946,8 +942,7 @@ void qedi_fp_process_cqes(struct qedi_work *work) goto exit_fp_process; case ISCSI_CQE_TYPE_TASK_CLEANUP: QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "CleanUp CqE\n"); - qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, task, - conn); + qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, conn); goto exit_fp_process; default: QEDI_ERR(&qedi->dbg_ctx, "Error cqe.\n"); @@ -1374,12 +1369,22 @@ static void qedi_tmf_work(struct work_struct *work) tmf_hdr = (struct iscsi_tm *)mtask->hdr; set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); - ctask = iscsi_itt_to_task(conn, tmf_hdr->rtt); - if (!ctask || !ctask->sc) { + spin_lock_bh(&conn->session->back_lock); + ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt); + if (!ctask || iscsi_task_is_completed(ctask)) { + spin_unlock_bh(&conn->session->back_lock); QEDI_ERR(&qedi->dbg_ctx, "Task already completed\n"); - goto abort_ret; + goto clear_cleanup; } + /* + * libiscsi gets a ref before sending the abort, but if libiscsi times + * it out then it could release it and the cmd could complete from + * under us. + */ + __iscsi_get_task(ctask); + spin_unlock_bh(&conn->session->back_lock); + cmd = (struct qedi_cmd *)ctask->dd_data; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Abort tmf rtt=0x%x, cmd itt=0x%x, cmd tid=0x%x, cid=0x%x\n", @@ -1389,19 +1394,20 @@ static void qedi_tmf_work(struct work_struct *work) if (qedi_do_not_recover) { QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n", qedi_do_not_recover); - goto abort_ret; + goto put_task; } list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC); if (!list_work) { QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n"); - goto abort_ret; + goto put_task; } qedi_cmd->type = TYPEIO; list_work->qedi_cmd = qedi_cmd; list_work->rtid = cmd->task_id; list_work->state = QEDI_WORK_SCHEDULED; + list_work->ctask = ctask; qedi_cmd->list_tmf_work = list_work; QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, @@ -1434,7 +1440,9 @@ static void qedi_tmf_work(struct work_struct *work) qedi_cmd->task_id = tid; qedi_send_iscsi_tmf(qedi_conn, qedi_cmd->task); -abort_ret: +put_task: + iscsi_put_task(ctask); +clear_cleanup: clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); return; @@ -1455,6 +1463,8 @@ static void qedi_tmf_work(struct work_struct *work) } spin_unlock(&qedi_conn->list_lock); + iscsi_put_task(ctask); + clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); } diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index 39dc27c85e3c..68ef519f5480 100644 --- a/drivers/scsi/qedi/qedi_iscsi.h +++ b/drivers/scsi/qedi/qedi_iscsi.h @@ -212,6 +212,7 @@ struct qedi_cmd { struct qedi_work_map { struct list_head list; struct qedi_cmd *qedi_cmd; + struct iscsi_task *ctask; int rtid; int state; -- 2.25.1