Re: [PATCH 2/7] libiscsi: drop taskqueuelock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 1/18/21 12:34 PM, Mike Christie wrote:
> The purpose of the taskqueuelock was to handle the issue where a bad
> target decides to send a R2T and before it's data has been sent
> decides to send a cmd response to complete the cmd. The following
> patches fix up the frwd/back locks so they are taken from the
> queue/xmit (frwd) and completion (back) paths again. To get there
> this patch removes the taskqueuelock which for iscsi xmit wq based
> drivers was taken in the queue, xmit and completion paths.
> 
> Instead of the lock, we just make sure we have a ref to the task
> when we queue a R2T, and then we always remove the task from the
> requeue list in the xmit path or the forced cleanup paths.
> 
> Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
> ---
>  drivers/scsi/libiscsi.c     | 178 +++++++++++++++++++++++-------------
>  drivers/scsi/libiscsi_tcp.c |  84 +++++++++++------
>  include/scsi/libiscsi.h     |   4 +-
>  3 files changed, 171 insertions(+), 95 deletions(-)
> 
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index cee1dbaa1b93..3d74fdd9f31a 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -523,16 +523,6 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
>  	WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
>  	task->state = state;
>  
> -	spin_lock_bh(&conn->taskqueuelock);
> -	if (!list_empty(&task->running)) {
> -		pr_debug_once("%s while task on list", __func__);
> -		list_del_init(&task->running);
> -	}
> -	spin_unlock_bh(&conn->taskqueuelock);
> -
> -	if (conn->task == task)
> -		conn->task = NULL;
> -
>  	if (READ_ONCE(conn->ping_task) == task)
>  		WRITE_ONCE(conn->ping_task, NULL);
>  
> @@ -564,9 +554,40 @@ void iscsi_complete_scsi_task(struct iscsi_task *task,
>  }
>  EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task);
>  
> +/*
> + * Must be called with back and frwd lock
> + */
> +static bool cleanup_queued_task(struct iscsi_task *task)
> +{
> +	struct iscsi_conn *conn = task->conn;
> +	bool early_complete = false;
> +
> +	/* Bad target might have completed task while it was still running */
> +	if (task->state == ISCSI_TASK_COMPLETED)
> +		early_complete = true;
> +
> +	if (!list_empty(&task->running)) {
> +		list_del_init(&task->running);
> +		/*
> +		 * If it's on a list but still running, this could be from
> +		 * a bad target sending a rsp early, cleanup from a TMF, or
> +		 * session recovery.
> +		 */
> +		if (task->state == ISCSI_TASK_RUNNING ||
> +		    task->state == ISCSI_TASK_COMPLETED)
> +			__iscsi_put_task(task);
> +	}
> +
> +	if (conn->task == task) {
> +		conn->task = NULL;
> +		__iscsi_put_task(task);
> +	}
> +
> +	return early_complete;
> +}
>  
>  /*
> - * session back_lock must be held and if not called for a task that is
> + * session frwd_lock must be held and if not called for a task that is
>   * still pending or from the xmit thread, then xmit thread must
>   * be suspended.
>   */
> @@ -585,6 +606,13 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
>  	if (!sc)
>  		return;
>  
> +	/* regular RX path uses back_lock */
> +	spin_lock_bh(&conn->session->back_lock);
> +	if (cleanup_queued_task(task)) {
> +		spin_unlock_bh(&conn->session->back_lock);
> +		return;
> +	}
> +
>  	if (task->state == ISCSI_TASK_PENDING) {
>  		/*
>  		 * cmd never made it to the xmit thread, so we should not count
> @@ -600,9 +628,6 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
>  
>  	sc->result = err << 16;
>  	scsi_set_resid(sc, scsi_bufflen(sc));
> -
> -	/* regular RX path uses back_lock */
> -	spin_lock_bh(&conn->session->back_lock);
>  	iscsi_complete_task(task, state);
>  	spin_unlock_bh(&conn->session->back_lock);
>  }
> @@ -748,9 +773,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
>  		if (session->tt->xmit_task(task))
>  			goto free_task;
>  	} else {
> -		spin_lock_bh(&conn->taskqueuelock);
>  		list_add_tail(&task->running, &conn->mgmtqueue);
> -		spin_unlock_bh(&conn->taskqueuelock);
>  		iscsi_conn_queue_work(conn);
>  	}
>  
> @@ -1411,31 +1434,61 @@ static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
>  	return 0;
>  }
>  
> -static int iscsi_xmit_task(struct iscsi_conn *conn)
> +static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task,
> +			   bool was_requeue)
>  {
> -	struct iscsi_task *task = conn->task;
>  	int rc;
>  
> -	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
> -		return -ENODATA;
> -
>  	spin_lock_bh(&conn->session->back_lock);
> -	if (conn->task == NULL) {
> +
> +	if (!conn->task) {
> +		/* Take a ref so we can access it after xmit_task() */
> +		__iscsi_get_task(task);
> +	} else {
> +		/* Already have a ref from when we failed to send it last call */
> +		conn->task = NULL;
> +	}
> +
> +	/*
> +	 * If this was a requeue for a R2T we have an extra ref on the task in
> +	 * case a bad target sends a cmd rsp before we have handled the task.
> +	 */
> +	if (was_requeue)
> +		__iscsi_put_task(task);
> +
> +	/*
> +	 * Do this after dropping the extra ref because if this was a requeue
> +	 * it's removed from that list and cleanup_queued_task would miss it.
> +	 */
> +	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
> +		/*
> +		 * Save the task and ref in case we weren't cleaning up this
> +		 * task and get woken up again.
> +		 */
> +		conn->task = task;
>  		spin_unlock_bh(&conn->session->back_lock);
>  		return -ENODATA;
>  	}
> -	__iscsi_get_task(task);
>  	spin_unlock_bh(&conn->session->back_lock);
> +
>  	spin_unlock_bh(&conn->session->frwd_lock);
>  	rc = conn->session->tt->xmit_task(task);
>  	spin_lock_bh(&conn->session->frwd_lock);
>  	if (!rc) {
>  		/* done with this task */
>  		task->last_xfer = jiffies;
> -		conn->task = NULL;
>  	}
>  	/* regular RX path uses back_lock */
>  	spin_lock(&conn->session->back_lock);
> +	if (rc && task->state == ISCSI_TASK_RUNNING) {
> +		/*
> +		 * get an extra ref that is released next time we access it
> +		 * as conn->task above.
> +		 */
> +		__iscsi_get_task(task);
> +		conn->task = task;
> +	}
> +
>  	__iscsi_put_task(task);
>  	spin_unlock(&conn->session->back_lock);
>  	return rc;
> @@ -1445,9 +1498,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
>   * iscsi_requeue_task - requeue task to run from session workqueue
>   * @task: task to requeue
>   *
> - * LLDs that need to run a task from the session workqueue should call
> - * this. The session frwd_lock must be held. This should only be called
> - * by software drivers.
> + * Callers must have taken a ref to the task that is going to be requeued.
>   */
>  void iscsi_requeue_task(struct iscsi_task *task)
>  {
> @@ -1457,11 +1508,18 @@ void iscsi_requeue_task(struct iscsi_task *task)
>  	 * this may be on the requeue list already if the xmit_task callout
>  	 * is handling the r2ts while we are adding new ones
>  	 */
> -	spin_lock_bh(&conn->taskqueuelock);
> -	if (list_empty(&task->running))
> +	spin_lock_bh(&conn->session->frwd_lock);
> +	if (list_empty(&task->running)) {
>  		list_add_tail(&task->running, &conn->requeue);
> -	spin_unlock_bh(&conn->taskqueuelock);
> +	} else {
> +		/*
> +		 * Don't need the extra ref since it's already requeued and
> +		 * has a ref.
> +		 */
> +		iscsi_put_task(task);
> +	}
>  	iscsi_conn_queue_work(conn);
> +	spin_unlock_bh(&conn->session->frwd_lock);
>  }
>  EXPORT_SYMBOL_GPL(iscsi_requeue_task);
>  
> @@ -1487,7 +1545,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  	}
>  
>  	if (conn->task) {
> -		rc = iscsi_xmit_task(conn);
> +		rc = iscsi_xmit_task(conn, conn->task, false);
>  	        if (rc)
>  		        goto done;
>  	}
> @@ -1497,49 +1555,41 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  	 * only have one nop-out as a ping from us and targets should not
>  	 * overflow us with nop-ins
>  	 */
> -	spin_lock_bh(&conn->taskqueuelock);
>  check_mgmt:
>  	while (!list_empty(&conn->mgmtqueue)) {
> -		conn->task = list_entry(conn->mgmtqueue.next,
> -					 struct iscsi_task, running);
> -		list_del_init(&conn->task->running);
> -		spin_unlock_bh(&conn->taskqueuelock);
> -		if (iscsi_prep_mgmt_task(conn, conn->task)) {
> +		task = list_entry(conn->mgmtqueue.next, struct iscsi_task,
> +				  running);
> +		list_del_init(&task->running);
> +		if (iscsi_prep_mgmt_task(conn, task)) {
>  			/* regular RX path uses back_lock */
>  			spin_lock_bh(&conn->session->back_lock);
> -			__iscsi_put_task(conn->task);
> +			__iscsi_put_task(task);
>  			spin_unlock_bh(&conn->session->back_lock);
> -			conn->task = NULL;
> -			spin_lock_bh(&conn->taskqueuelock);
>  			continue;
>  		}
> -		rc = iscsi_xmit_task(conn);
> +		rc = iscsi_xmit_task(conn, task, false);
>  		if (rc)
>  			goto done;
> -		spin_lock_bh(&conn->taskqueuelock);
>  	}
>  
>  	/* process pending command queue */
>  	while (!list_empty(&conn->cmdqueue)) {
> -		conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
> -					running);
> -		list_del_init(&conn->task->running);
> -		spin_unlock_bh(&conn->taskqueuelock);
> +		task = list_entry(conn->cmdqueue.next, struct iscsi_task,
> +				  running);
> +		list_del_init(&task->running);
>  		if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
> -			fail_scsi_task(conn->task, DID_IMM_RETRY);
> -			spin_lock_bh(&conn->taskqueuelock);
> +			fail_scsi_task(task, DID_IMM_RETRY);
>  			continue;
>  		}
> -		rc = iscsi_prep_scsi_cmd_pdu(conn->task);
> +		rc = iscsi_prep_scsi_cmd_pdu(task);
>  		if (rc) {
>  			if (rc == -ENOMEM || rc == -EACCES)
> -				fail_scsi_task(conn->task, DID_IMM_RETRY);
> +				fail_scsi_task(task, DID_IMM_RETRY);
>  			else
> -				fail_scsi_task(conn->task, DID_ABORT);
> -			spin_lock_bh(&conn->taskqueuelock);
> +				fail_scsi_task(task, DID_ABORT);
>  			continue;
>  		}
> -		rc = iscsi_xmit_task(conn);
> +		rc = iscsi_xmit_task(conn, task, false);
>  		if (rc)
>  			goto done;
>  		/*
> @@ -1547,7 +1597,6 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  		 * we need to check the mgmt queue for nops that need to
>  		 * be sent to aviod starvation
>  		 */
> -		spin_lock_bh(&conn->taskqueuelock);
>  		if (!list_empty(&conn->mgmtqueue))
>  			goto check_mgmt;
>  	}
> @@ -1561,21 +1610,17 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>  
>  		task = list_entry(conn->requeue.next, struct iscsi_task,
>  				  running);
> +
>  		if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT))
>  			break;
>  
> -		conn->task = task;
> -		list_del_init(&conn->task->running);
> -		conn->task->state = ISCSI_TASK_RUNNING;
> -		spin_unlock_bh(&conn->taskqueuelock);
> -		rc = iscsi_xmit_task(conn);
> +		list_del_init(&task->running);
> +		rc = iscsi_xmit_task(conn, task, true);
>  		if (rc)
>  			goto done;
> -		spin_lock_bh(&conn->taskqueuelock);
>  		if (!list_empty(&conn->mgmtqueue))
>  			goto check_mgmt;
>  	}
> -	spin_unlock_bh(&conn->taskqueuelock);
>  	spin_unlock_bh(&conn->session->frwd_lock);
>  	return -ENODATA;
>  
> @@ -1741,9 +1786,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
>  			goto prepd_reject;
>  		}
>  	} else {
> -		spin_lock_bh(&conn->taskqueuelock);
>  		list_add_tail(&task->running, &conn->cmdqueue);
> -		spin_unlock_bh(&conn->taskqueuelock);
>  		iscsi_conn_queue_work(conn);
>  	}
>  
> @@ -2914,7 +2957,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
>  	INIT_LIST_HEAD(&conn->mgmtqueue);
>  	INIT_LIST_HEAD(&conn->cmdqueue);
>  	INIT_LIST_HEAD(&conn->requeue);
> -	spin_lock_init(&conn->taskqueuelock);
>  	INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
>  
>  	/* allocate login_task used for the login/text sequences */
> @@ -3080,10 +3122,16 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
>  		ISCSI_DBG_SESSION(conn->session,
>  				  "failing mgmt itt 0x%x state %d\n",
>  				  task->itt, task->state);
> +
> +		spin_lock_bh(&session->back_lock);
> +		if (cleanup_queued_task(task)) {
> +			spin_unlock_bh(&session->back_lock);
> +			continue;
> +		}
> +
>  		state = ISCSI_TASK_ABRT_SESS_RECOV;
>  		if (task->state == ISCSI_TASK_PENDING)
>  			state = ISCSI_TASK_COMPLETED;
> -		spin_lock_bh(&session->back_lock);
>  		iscsi_complete_task(task, state);
>  		spin_unlock_bh(&session->back_lock);
>  	}
> diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
> index 83f14b2c8804..14c9169f13ec 100644
> --- a/drivers/scsi/libiscsi_tcp.c
> +++ b/drivers/scsi/libiscsi_tcp.c
> @@ -524,48 +524,79 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
>  /**
>   * iscsi_tcp_r2t_rsp - iSCSI R2T Response processing
>   * @conn: iscsi connection
> - * @task: scsi command task
> + * @hdr: PDU header
>   */
> -static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
> +static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
>  {
>  	struct iscsi_session *session = conn->session;
> -	struct iscsi_tcp_task *tcp_task = task->dd_data;
> -	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
> -	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
> +	struct iscsi_tcp_task *tcp_task;
> +	struct iscsi_tcp_conn *tcp_conn;
> +	struct iscsi_r2t_rsp *rhdr;
>  	struct iscsi_r2t_info *r2t;
> -	int r2tsn = be32_to_cpu(rhdr->r2tsn);
> +	struct iscsi_task *task;
>  	u32 data_length;
>  	u32 data_offset;
> +	int r2tsn;
>  	int rc;
>  
> +	spin_lock(&session->back_lock);
> +	task = iscsi_itt_to_ctask(conn, hdr->itt);
> +	if (!task) {
> +		spin_unlock(&session->back_lock);
> +		return ISCSI_ERR_BAD_ITT;
> +	} else if (task->sc->sc_data_direction != DMA_TO_DEVICE) {
> +		spin_unlock(&session->back_lock);
> +		return ISCSI_ERR_PROTO;
> +	}
> +	/*
> +	 * A bad target might complete the cmd before we have handled R2Ts
> +	 * so get a ref to the task that will be dropped in the xmit path.
> +	 */
> +	if (task->state != ISCSI_TASK_RUNNING) {
> +		spin_unlock(&session->back_lock);
> +		/* Let the path that got the early rsp complete it */
> +		return 0;
> +	}
> +	task->last_xfer = jiffies;
> +	__iscsi_get_task(task);
> +
> +	tcp_conn = conn->dd_data;
> +	rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
> +	/* fill-in new R2T associated with the task */
> +	iscsi_update_cmdsn(session, (struct iscsi_nopin *)rhdr);
> +	spin_unlock(&session->back_lock);
> +
>  	if (tcp_conn->in.datalen) {
>  		iscsi_conn_printk(KERN_ERR, conn,
>  				  "invalid R2t with datalen %d\n",
>  				  tcp_conn->in.datalen);
> -		return ISCSI_ERR_DATALEN;
> +		rc = ISCSI_ERR_DATALEN;
> +		goto put_task;
>  	}
>  
> +	tcp_task = task->dd_data;
> +	r2tsn = be32_to_cpu(rhdr->r2tsn);
>  	if (tcp_task->exp_datasn != r2tsn){
>  		ISCSI_DBG_TCP(conn, "task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
>  			      tcp_task->exp_datasn, r2tsn);
> -		return ISCSI_ERR_R2TSN;
> +		rc = ISCSI_ERR_R2TSN;
> +		goto put_task;
>  	}
>  
> -	/* fill-in new R2T associated with the task */
> -	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
> -
>  	if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
>  		iscsi_conn_printk(KERN_INFO, conn,
>  				  "dropping R2T itt %d in recovery.\n",
>  				  task->itt);
> -		return 0;
> +		rc = 0;
> +		goto put_task;
>  	}
>  
>  	data_length = be32_to_cpu(rhdr->data_length);
>  	if (data_length == 0) {
>  		iscsi_conn_printk(KERN_ERR, conn,
>  				  "invalid R2T with zero data len\n");
> -		return ISCSI_ERR_DATALEN;
> +		rc = ISCSI_ERR_DATALEN;
> +		goto put_task;
>  	}
>  
>  	if (data_length > session->max_burst)
> @@ -579,7 +610,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
>  				  "invalid R2T with data len %u at offset %u "
>  				  "and total length %d\n", data_length,
>  				  data_offset, task->sc->sdb.length);
> -		return ISCSI_ERR_DATALEN;
> +		rc = ISCSI_ERR_DATALEN;
> +		goto put_task;
>  	}
>  
>  	spin_lock(&tcp_task->pool2queue);
> @@ -589,7 +621,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
>  				  "Target has sent more R2Ts than it "
>  				  "negotiated for or driver has leaked.\n");
>  		spin_unlock(&tcp_task->pool2queue);
> -		return ISCSI_ERR_PROTO;
> +		rc = ISCSI_ERR_PROTO;
> +		goto put_task;
>  	}
>  
>  	r2t->exp_statsn = rhdr->statsn;
> @@ -607,6 +640,10 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
>  
>  	iscsi_requeue_task(task);
>  	return 0;
> +
> +put_task:
> +	iscsi_put_task(task);
> +	return rc;
>  }
>  
>  /*
> @@ -730,20 +767,11 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
>  		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
>  		break;
>  	case ISCSI_OP_R2T:
> -		spin_lock(&conn->session->back_lock);
> -		task = iscsi_itt_to_ctask(conn, hdr->itt);
> -		spin_unlock(&conn->session->back_lock);
> -		if (!task)
> -			rc = ISCSI_ERR_BAD_ITT;
> -		else if (ahslen)
> +		if (ahslen) {
>  			rc = ISCSI_ERR_AHSLEN;
> -		else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
> -			task->last_xfer = jiffies;
> -			spin_lock(&conn->session->frwd_lock);
> -			rc = iscsi_tcp_r2t_rsp(conn, task);
> -			spin_unlock(&conn->session->frwd_lock);
> -		} else
> -			rc = ISCSI_ERR_PROTO;
> +			break;
> +		}
> +		rc = iscsi_tcp_r2t_rsp(conn, hdr);
>  		break;
>  	case ISCSI_OP_LOGIN_RSP:
>  	case ISCSI_OP_TEXT_RSP:
> diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
> index b3bbd10eb3f0..44a9554aea62 100644
> --- a/include/scsi/libiscsi.h
> +++ b/include/scsi/libiscsi.h
> @@ -187,7 +187,7 @@ struct iscsi_conn {
>  	struct iscsi_task	*task;		/* xmit task in progress */
>  
>  	/* xmit */
> -	spinlock_t		taskqueuelock;  /* protects the next three lists */
> +	/* items must be added/deleted under frwd lock */
>  	struct list_head	mgmtqueue;	/* mgmt (control) xmit queue */
>  	struct list_head	cmdqueue;	/* data-path cmd queue */
>  	struct list_head	requeue;	/* tasks needing another run */
> @@ -332,7 +332,7 @@ struct iscsi_session {
>  						 * cmdsn, queued_cmdsn     *
>  						 * session resources:      *
>  						 * - cmdpool kfifo_out ,   *
> -						 * - mgmtpool,		   */
> +						 * - mgmtpool, queues	   */
>  	spinlock_t		back_lock;	/* protects cmdsn_exp      *
>  						 * cmdsn_max,              *
>  						 * cmdpool kfifo_in        */
> 

Wow. That's a thick one. :)

Reviewed-by: Lee Duncan <lduncan@xxxxxxxx>




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux