Yikes, my git-send-email settings suppressed the important CCs. Sorry! Guilherme and Ilkka, can you comment about your testing results or review please? - Chris Leech ----- Original Message ----- > There's a rather long standing regression from commit > 659743b [SCSI] libiscsi: Reduce locking contention in fast path > > Depending on iSCSI target behavior, it's possible to hit the case in > iscsi_complete_task where the task is still on a pending list > (!list_empty(&task->running)). When that happens the task is removed > from the list while holding the session back_lock, but other task list > modification occur under the frwd_lock. That leads to linked list > corruption and eventually a panicked system. > > Rather than back out the session lock split entirely, in order to try > and keep some of the performance gains this patch adds another lock to > maintain the task lists integrity. > > Major enterprise supported kernels have been backing out the lock split > for while now, thanks to the efforts at IBM where a lab setup has the > most reliable reproducer I've seen on this issue. This patch has been > tested there successfully. > > Signed-off-by: Chris Leech <cleech@xxxxxxxxxx> > --- > drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++- > include/scsi/libiscsi.h | 1 + > 2 files changed, 26 insertions(+), 1 deletion(-) > > diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c > index 834d121..acb5ef3 100644 > --- a/drivers/scsi/libiscsi.c > +++ b/drivers/scsi/libiscsi.c > @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, > int state) > WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); > task->state = state; > > - if (!list_empty(&task->running)) > + spin_lock_bh(&conn->taskqueuelock); > + if (!list_empty(&task->running)) { > + WARN_ONCE(1, "iscsi_complete_task while task on list"); > list_del_init(&task->running); > + } > + spin_unlock_bh(&conn->taskqueuelock); > > if (conn->task == task) > conn->task = NULL; > @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct > iscsi_hdr *hdr, > if (session->tt->xmit_task(task)) > goto free_task; > } else { > + spin_lock_bh(&conn->taskqueuelock); > list_add_tail(&task->running, &conn->mgmtqueue); > + spin_unlock_bh(&conn->taskqueuelock); > iscsi_conn_queue_work(conn); > } > > @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task) > * this may be on the requeue list already if the xmit_task callout > * is handling the r2ts while we are adding new ones > */ > + spin_lock_bh(&conn->taskqueuelock); > if (list_empty(&task->running)) > list_add_tail(&task->running, &conn->requeue); > + spin_unlock_bh(&conn->taskqueuelock); > iscsi_conn_queue_work(conn); > } > EXPORT_SYMBOL_GPL(iscsi_requeue_task); > @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) > * only have one nop-out as a ping from us and targets should not > * overflow us with nop-ins > */ > + spin_lock_bh(&conn->taskqueuelock); > check_mgmt: > while (!list_empty(&conn->mgmtqueue)) { > conn->task = list_entry(conn->mgmtqueue.next, > struct iscsi_task, running); > list_del_init(&conn->task->running); > + spin_unlock_bh(&conn->taskqueuelock); > if (iscsi_prep_mgmt_task(conn, conn->task)) { > /* regular RX path uses back_lock */ > spin_lock_bh(&conn->session->back_lock); > __iscsi_put_task(conn->task); > spin_unlock_bh(&conn->session->back_lock); > conn->task = NULL; > + spin_lock_bh(&conn->taskqueuelock); > continue; > } > rc = iscsi_xmit_task(conn); > if (rc) > goto done; > + spin_lock_bh(&conn->taskqueuelock); > } > > /* process pending command queue */ > @@ -1535,19 +1547,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) > conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, > running); > list_del_init(&conn->task->running); > + spin_unlock_bh(&conn->taskqueuelock); > if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { > fail_scsi_task(conn->task, DID_IMM_RETRY); > + spin_lock_bh(&conn->taskqueuelock); > continue; > } > rc = iscsi_prep_scsi_cmd_pdu(conn->task); > if (rc) { > if (rc == -ENOMEM || rc == -EACCES) { > + spin_lock_bh(&conn->taskqueuelock); > list_add_tail(&conn->task->running, > &conn->cmdqueue); > conn->task = NULL; > + spin_unlock_bh(&conn->taskqueuelock); > goto done; > } else > fail_scsi_task(conn->task, DID_ABORT); > + spin_lock_bh(&conn->taskqueuelock); > continue; > } > rc = iscsi_xmit_task(conn); > @@ -1558,6 +1575,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) > * we need to check the mgmt queue for nops that need to > * be sent to aviod starvation > */ > + spin_lock_bh(&conn->taskqueuelock); > if (!list_empty(&conn->mgmtqueue)) > goto check_mgmt; > } > @@ -1577,12 +1595,15 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) > conn->task = task; > list_del_init(&conn->task->running); > conn->task->state = ISCSI_TASK_RUNNING; > + spin_unlock_bh(&conn->taskqueuelock); > rc = iscsi_xmit_task(conn); > if (rc) > goto done; > + spin_lock_bh(&conn->taskqueuelock); > if (!list_empty(&conn->mgmtqueue)) > goto check_mgmt; > } > + spin_unlock_bh(&conn->taskqueuelock); > spin_unlock_bh(&conn->session->frwd_lock); > return -ENODATA; > > @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct > scsi_cmnd *sc) > goto prepd_reject; > } > } else { > + spin_lock_bh(&conn->taskqueuelock); > list_add_tail(&task->running, &conn->cmdqueue); > + spin_unlock_bh(&conn->taskqueuelock); > iscsi_conn_queue_work(conn); > } > > @@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, > int dd_size, > INIT_LIST_HEAD(&conn->mgmtqueue); > INIT_LIST_HEAD(&conn->cmdqueue); > INIT_LIST_HEAD(&conn->requeue); > + spin_lock_init(&conn->taskqueuelock); > INIT_WORK(&conn->xmitwork, iscsi_xmitworker); > > /* allocate login_task used for the login/text sequences */ > diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h > index b0e275d..583875e 100644 > --- a/include/scsi/libiscsi.h > +++ b/include/scsi/libiscsi.h > @@ -196,6 +196,7 @@ struct iscsi_conn { > struct iscsi_task *task; /* xmit task in progress */ > > /* xmit */ > + spinlock_t taskqueuelock; /* protects the next three lists */ > struct list_head mgmtqueue; /* mgmt (control) xmit queue */ > struct list_head cmdqueue; /* data-path cmd queue */ > struct list_head requeue; /* tasks needing another run */ > -- > 2.9.3 > >