> -----Original Message----- > From: Mike Christie <michael.christie@xxxxxxxxxx> > Sent: Sunday, April 11, 2021 12:10 AM > To: lduncan@xxxxxxxx; martin.petersen@xxxxxxxxxx; Manish Rangankar > <mrangankar@xxxxxxxxxxx>; Santosh Vernekar <svernekar@xxxxxxxxxxx>; > linux-scsi@xxxxxxxxxxxxxxx; jejb@xxxxxxxxxxxxx > Cc: Mike Christie <michael.christie@xxxxxxxxxx> > Subject: [EXT] [PATCH 05/13] scsi: qedi: fix use after free during abort cleanup > > External Email > > ---------------------------------------------------------------------- > This fixes two bugs: > > 1. The scsi cmd task could be completed and the abort could timeout while we > are running qedi_tmf_work so we need to get a ref to the task. > > 2. If qedi_tmf_work's qedi_wait_for_cleanup_request call times out then we will > also force the clean up of the qedi_work_map but > qedi_process_cmd_cleanup_resp could still be accessing the qedi_cmd for the > abort tmf. We can then race where qedi_process_cmd_cleanup_resp is still > accessing the mtask's qedi_cmd but libiscsi has escalated to session level > cleanup and is cleaning up the mtask while we are still accessing it. > > Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> > --- > drivers/scsi/qedi/qedi_fw.c | 110 ++++++++++++++++++--------------- > drivers/scsi/qedi/qedi_iscsi.h | 1 + > 2 files changed, 61 insertions(+), 50 deletions(-) > > diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index > ad4357e4c15d..c5699421ec37 100644 > --- a/drivers/scsi/qedi/qedi_fw.c > +++ b/drivers/scsi/qedi/qedi_fw.c > @@ -729,7 +729,6 @@ static void qedi_process_nopin_local_cmpl(struct > qedi_ctx *qedi, > > static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, > struct iscsi_cqe_solicited *cqe, > - struct iscsi_task *task, > struct iscsi_conn *conn) > { > struct qedi_work_map *work, *work_tmp; @@ -742,7 +741,7 @@ > static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, > u32 iscsi_cid; > struct qedi_conn *qedi_conn; > struct qedi_cmd *dbg_cmd; > - struct iscsi_task *mtask; > + struct iscsi_task *mtask, *task; > struct iscsi_tm *tmf_hdr = NULL; > > iscsi_cid = cqe->conn_id; > @@ -768,6 +767,7 @@ static void qedi_process_cmd_cleanup_resp(struct > qedi_ctx *qedi, > } > found = 1; > mtask = qedi_cmd->task; > + task = work->ctask; > tmf_hdr = (struct iscsi_tm *)mtask->hdr; > rtid = work->rtid; > > @@ -776,52 +776,47 @@ static void qedi_process_cmd_cleanup_resp(struct > qedi_ctx *qedi, > qedi_cmd->list_tmf_work = NULL; > } > } > - spin_unlock_bh(&qedi_conn->tmf_work_lock); > - > - if (found) { > - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, > - "TMF work, cqe->tid=0x%x, tmf flags=0x%x, > cid=0x%x\n", > - proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id); > - > - if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == > - ISCSI_TM_FUNC_ABORT_TASK) { > - spin_lock_bh(&conn->session->back_lock); > > - protoitt = build_itt(get_itt(tmf_hdr->rtt), > - conn->session->age); > - task = iscsi_itt_to_task(conn, protoitt); > - > - spin_unlock_bh(&conn->session->back_lock); > + if (!found) { > + spin_unlock_bh(&qedi_conn->tmf_work_lock); > + goto check_cleanup_reqs; > + } > > - if (!task) { > - QEDI_NOTICE(&qedi->dbg_ctx, > - "IO task completed, tmf rtt=0x%x, > cid=0x%x\n", > - get_itt(tmf_hdr->rtt), > - qedi_conn->iscsi_conn_id); > - return; > - } > + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, > + "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n", > + proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id); > + > + spin_lock_bh(&conn->session->back_lock); > + if (iscsi_task_is_completed(task)) { > + QEDI_NOTICE(&qedi->dbg_ctx, > + "IO task completed, tmf rtt=0x%x, cid=0x%x\n", > + get_itt(tmf_hdr->rtt), qedi_conn->iscsi_conn_id); > + goto unlock; > + } > > - dbg_cmd = task->dd_data; > + dbg_cmd = task->dd_data; > > - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, > - "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o > tid=0x%x, cid=0x%x\n", > - get_itt(tmf_hdr->rtt), get_itt(task->itt), > - dbg_cmd->task_id, qedi_conn- > >iscsi_conn_id); > + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, > + "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n", > + get_itt(tmf_hdr->rtt), get_itt(task->itt), dbg_cmd->task_id, > + qedi_conn->iscsi_conn_id); > > - if (qedi_cmd->state == CLEANUP_WAIT_FAILED) > - qedi_cmd->state = CLEANUP_RECV; > + spin_lock(&qedi_conn->list_lock); > + if (likely(dbg_cmd->io_cmd_in_list)) { > + dbg_cmd->io_cmd_in_list = false; > + list_del_init(&dbg_cmd->io_cmd); > + qedi_conn->active_cmd_count--; > + } > + spin_unlock(&qedi_conn->list_lock); > + qedi_cmd->state = CLEANUP_RECV; > +unlock: > + spin_unlock_bh(&conn->session->back_lock); > + spin_unlock_bh(&qedi_conn->tmf_work_lock); > + wake_up_interruptible(&qedi_conn->wait_queue); > + return; > > - spin_lock(&qedi_conn->list_lock); > - if (likely(dbg_cmd->io_cmd_in_list)) { > - dbg_cmd->io_cmd_in_list = false; > - list_del_init(&dbg_cmd->io_cmd); > - qedi_conn->active_cmd_count--; > - } > - spin_unlock(&qedi_conn->list_lock); > - qedi_cmd->state = CLEANUP_RECV; > - wake_up_interruptible(&qedi_conn->wait_queue); > - } > - } else if (qedi_conn->cmd_cleanup_req > 0) { > +check_cleanup_reqs: > + if (qedi_conn->cmd_cleanup_req > 0) { > spin_lock_bh(&conn->session->back_lock); > qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); > protoitt = build_itt(ptmp_itt, conn->session->age); @@ -844,6 > +839,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, > QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID, > "Freeing tid=0x%x for cid=0x%x\n", > cqe->itid, qedi_conn->iscsi_conn_id); > + qedi_clear_task_idx(qedi_conn->qedi, cqe->itid); > > } else { > qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt); @@ -946,8 > +942,7 @@ void qedi_fp_process_cqes(struct qedi_work *work) > goto exit_fp_process; > case ISCSI_CQE_TYPE_TASK_CLEANUP: > QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "CleanUp > CqE\n"); > - qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, > task, > - conn); > + qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, > conn); > goto exit_fp_process; > default: > QEDI_ERR(&qedi->dbg_ctx, "Error cqe.\n"); @@ -1374,12 > +1369,22 @@ static void qedi_tmf_work(struct work_struct *work) > tmf_hdr = (struct iscsi_tm *)mtask->hdr; > set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > > - ctask = iscsi_itt_to_task(conn, tmf_hdr->rtt); > - if (!ctask || !ctask->sc) { > + spin_lock_bh(&conn->session->back_lock); > + ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt); > + if (!ctask || iscsi_task_is_completed(ctask)) { > + spin_unlock_bh(&conn->session->back_lock); > QEDI_ERR(&qedi->dbg_ctx, "Task already completed\n"); > - goto abort_ret; > + goto clear_cleanup; > } > > + /* > + * libiscsi gets a ref before sending the abort, but if libiscsi times > + * it out then it could release it and the cmd could complete from > + * under us. > + */ > + __iscsi_get_task(ctask); > + spin_unlock_bh(&conn->session->back_lock); > + > cmd = (struct qedi_cmd *)ctask->dd_data; > QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, > "Abort tmf rtt=0x%x, cmd itt=0x%x, cmd tid=0x%x, > cid=0x%x\n", @@ -1389,19 +1394,20 @@ static void qedi_tmf_work(struct > work_struct *work) > if (qedi_do_not_recover) { > QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT > %d\n", > qedi_do_not_recover); > - goto abort_ret; > + goto put_task; > } > > list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC); > if (!list_work) { > QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n"); > - goto abort_ret; > + goto put_task; > } > > qedi_cmd->type = TYPEIO; > list_work->qedi_cmd = qedi_cmd; > list_work->rtid = cmd->task_id; > list_work->state = QEDI_WORK_SCHEDULED; > + list_work->ctask = ctask; > qedi_cmd->list_tmf_work = list_work; > > QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, @@ -1434,7 +1440,9 > @@ static void qedi_tmf_work(struct work_struct *work) > qedi_cmd->task_id = tid; > qedi_send_iscsi_tmf(qedi_conn, qedi_cmd->task); > > -abort_ret: > +put_task: > + iscsi_put_task(ctask); > +clear_cleanup: > clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); > return; > > @@ -1455,6 +1463,8 @@ static void qedi_tmf_work(struct work_struct *work) > } > spin_unlock(&qedi_conn->list_lock); > > + iscsi_put_task(ctask); > + > clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); } > > diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h index > 39dc27c85e3c..68ef519f5480 100644 > --- a/drivers/scsi/qedi/qedi_iscsi.h > +++ b/drivers/scsi/qedi/qedi_iscsi.h > @@ -212,6 +212,7 @@ struct qedi_cmd { > struct qedi_work_map { > struct list_head list; > struct qedi_cmd *qedi_cmd; > + struct iscsi_task *ctask; > int rtid; > > int state; > -- > 2.25.1 Thanks, Reviewed-by: Manish Rangankar <mrangankar@xxxxxxxxxxx>