On 12/14/20 2:41 AM, Wu Bo wrote: > When testing kernel 4.18 version, NULL pointer dereference problem occurs > in iscsi_eh_cmd_timed_out function. > > I think this bug in the upstream is still exists. > > The analysis reasons are as follows: > 1) For some reason, I/O command did not complete within > the timeout period. The block layer timer works, > call scsi_times_out() to handle I/O timeout logic. > At the same time the command just completes. > > 2) scsi_times_out() call iscsi_eh_cmd_timed_out() > to processing timeout logic. although there is an NULL judgment > for the task, the task has not been released yet now. > > 3) iscsi_complete_task() call __iscsi_put_task(), > The task reference count reaches zero, the conditions for free task > is met, then iscsi_free_task () free the task, > and let sc->SCp.ptr = NULL. After iscsi_eh_cmd_timed_out passes > the task judgment check, there may be NULL dereference scenarios > later. > I have a patch for this I think. This is broken out of patchset I was trying to fixup the back lock usage for offload drivers, so I have only compile tested it. There is another issue where the for lun reset cleanup we could race. The comments mention suspending the rx side, but we only do that for session level cleaup. The basic idea is we don't want to add more frwd lock uses in the completion patch like in your patch. In these non perf paths, like the tmf/timeout case we can just take a ref to the cmd so it's not freed from under us. diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index f9314f1..f07f8c1 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -573,18 +573,9 @@ void iscsi_complete_scsi_task(struct iscsi_task *task, static void fail_scsi_task(struct iscsi_task *task, int err) { struct iscsi_conn *conn = task->conn; - struct scsi_cmnd *sc; + struct scsi_cmnd *sc = task->sc; int state; - /* - * if a command completes and we get a successful tmf response - * we will hit this because the scsi eh abort code does not take - * a ref to the task. - */ - sc = task->sc; - if (!sc) - return; - if (task->state == ISCSI_TASK_PENDING) { /* * cmd never made it to the xmit thread, so we should not count @@ -1855,26 +1846,34 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn, } /* - * Fail commands. session lock held and recv side suspended and xmit - * thread flushed + * Fail commands. session frwd lock held and and xmit thread flushed. */ static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error) { + struct iscsi_session *session = conn->session; struct iscsi_task *task; int i; - for (i = 0; i < conn->session->cmds_max; i++) { - task = conn->session->cmds[i]; - if (!task->sc || task->state == ISCSI_TASK_FREE) + for (i = 0; i < session->cmds_max; i++) { + spin_lock_bh(&session->back_lock); + task = session->cmds[i]; + if (!task->sc || task->state == ISCSI_TASK_FREE) { + spin_unlock_bh(&session->back_lock); continue; + } - if (lun != -1 && lun != task->sc->device->lun) + if (lun != -1 && lun != task->sc->device->lun) { + spin_unlock_bh(&session->back_lock); continue; + } + __iscsi_get_task(task); + spin_unlock_bh(&session->back_lock); - ISCSI_DBG_SESSION(conn->session, + ISCSI_DBG_SESSION(session, "failing sc %p itt 0x%x state %d\n", task->sc, task->itt, task->state); fail_scsi_task(task, error); + iscsi_put_task(task); } } @@ -1953,6 +1952,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc); spin_lock_bh(&session->frwd_lock); + spin_lock(&session->back_lock); task = (struct iscsi_task *)sc->SCp.ptr; if (!task) { /* @@ -1960,8 +1960,11 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * so let timeout code complete it now. */ rc = BLK_EH_DONE; + spin_unlock(&session->back_lock); goto done; } + __iscsi_get_task(task); + spin_unlock(&session->back_lock); if (session->state != ISCSI_STATE_LOGGED_IN) { /* @@ -2077,9 +2080,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) rc = BLK_EH_RESET_TIMER; done: - if (task) - task->last_timeout = jiffies; spin_unlock_bh(&session->frwd_lock); + + if (task) { + task->last_timeout = jiffies; + iscsi_put_task(task); + } ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? "timer reset" : "shutdown or nh"); return rc; @@ -2187,15 +2193,20 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) conn->eh_abort_cnt++; age = session->age; - task = (struct iscsi_task *)sc->SCp.ptr; - ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", - sc, task->itt); - - /* task completed before time out */ - if (!task->sc) { + spin_lock(&session->back_lock); + task = (struct iscsi_task *)sc->SCp.ptr; + if (!task || !task->sc) { + /* task completed before time out */ ISCSI_DBG_EH(session, "sc completed while abort in progress\n"); - goto success; + + spin_unlock(&session->back_lock); + spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&session->eh_mutex); + return SUCCESS; } + ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt); + __iscsi_get_task(task); + spin_unlock(&session->back_lock); if (task->state == ISCSI_TASK_PENDING) { fail_scsi_task(task, DID_ABORT); @@ -2258,6 +2269,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n", sc, task->itt); mutex_unlock(&session->eh_mutex); + + iscsi_put_task(task); return SUCCESS; failed: @@ -2266,6 +2279,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc, task ? task->itt : 0); mutex_unlock(&session->eh_mutex); + + iscsi_put_task(task); return FAILED; } EXPORT_SYMBOL_GPL(iscsi_eh_abort);