Re: [PATCH 03/19] target: Avoid that aborting a command sporadically hangs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2017-05-05 at 10:53 +0200, Christoph Hellwig wrote:
> On Thu, May 04, 2017 at 03:50:46PM -0700, Bart Van Assche wrote:
> > For several target drivers (e.g. ib_srpt and ib_isert) sleeping inside
> > transport_generic_free_cmd() causes RDMA completion processing to stall.
> > Hence only sleep inside this function if the (iSCSI) target driver
> > requires this.
> 
> This looks reasonable to me:
> 
> Reviewed-by: Christoph Hellwig <hch@xxxxxx>
> 
> But as a further step can we try to move the waiting behavior entirely
> into the caller that actually cares,
> 
> e.g. move the conditional target_wait_free_cmd before the call
> to transport_generic_free_cmd in transport_generic_free_cmd,
> and move this second wait_for_completion after the
> transport_generic_free_cmd call based on an indicator (return value
> or se_cmd flag)?

Hello Christoph,

I have started working on eliminating the waiting code from
transport_generic_free_cmd(). I'm still working on a patch for the iSCSI
target driver. What I came up so far for the loop and Xen scsiback drivers
is attached to this e-mail.

Bart.
From 4a56d0f88e02a26aace617709a35b9ee81856890 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Date: Wed, 10 May 2017 15:40:39 -0700
Subject: [PATCH 1/5] xen/scsiback: Replace a waitqueue and a counter by a
 completion

Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: David Disseldorp <ddiss@xxxxxxx>
Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx
---
 drivers/xen/xen-scsiback.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index d6950e0802b7..5f1daf161312 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -137,8 +137,7 @@ struct vscsibk_pend {
 };
 
 struct scsiback_tmr {
-	atomic_t tmr_complete;
-	wait_queue_head_t tmr_wait;
+	struct completion done;
 };
 
 #define VSCSI_DEFAULT_SESSION_TAGS	128
@@ -609,7 +608,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 		goto err;
 	}
 
-	init_waitqueue_head(&tmr->tmr_wait);
+	init_completion(&tmr->done);
 
 	rc = target_submit_tmr(&pending_req->se_cmd, nexus->tvn_se_sess,
 			       &pending_req->sense_buffer[0],
@@ -618,7 +617,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	if (rc)
 		goto err;
 
-	wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
+	wait_for_completion(&tmr->done);
 
 	err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
 		SUCCESS : FAILED;
@@ -1458,8 +1457,7 @@ static void scsiback_queue_tm_rsp(struct se_cmd *se_cmd)
 	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
 	struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
 
-	atomic_set(&tmr->tmr_complete, 1);
-	wake_up(&tmr->tmr_wait);
+	complete(&tmr->done);
 }
 
 static void scsiback_aborted_task(struct se_cmd *se_cmd)
-- 
2.12.2

From 043951a899f826a9a20733cd18006dbb7977e6b5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Date: Wed, 10 May 2017 15:13:04 -0700
Subject: [PATCH 2/5] xen/scsiback: Make TMF processing slightly faster

Target drivers must guarantee that struct se_cmd and struct se_tmr_req
exist as long as target_tmr_work() is in progress. Since the last
access by the LIO core is a call to .check_stop_free() and since the
Xen scsiback .check_stop_free() drops a reference to the TMF, it is
already guaranteed that the struct se_cmd that corresponds to the TMF
exists as long as target_tmr_work() is in progress. Hence change the
second argument of transport_generic_free_cmd() from 1 into 0.

Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: David Disseldorp <ddiss@xxxxxxx>
Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx
---
 drivers/xen/xen-scsiback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 5f1daf161312..3a67acb33dfb 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -623,7 +623,7 @@ static void scsiback_device_action(struct vscsibk_pend *pending_req,
 		SUCCESS : FAILED;
 
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
-	transport_generic_free_cmd(&pending_req->se_cmd, 1);
+	transport_generic_free_cmd(&pending_req->se_cmd, 0);
 	return;
 err:
 	if (tmr)
-- 
2.12.2

From 901c4cd1f1694fbe2edf235734462056ad58a9e8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Date: Wed, 10 May 2017 14:35:55 -0700
Subject: [PATCH 3/5] target/tcm_loop: Replace a waitqueue and a counter by a
 completion

Make the tcm_loop code slightly easier to read by using a struct
completion instead of open-coding the completion concept.

Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: David Disseldorp <ddiss@xxxxxxx>
---
 drivers/target/loopback/tcm_loop.c | 13 +++++--------
 drivers/target/loopback/tcm_loop.h |  3 +--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 5091b31b3e56..25b6cab5e158 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -245,7 +245,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 		pr_err("Unable to allocate memory for tl_tmr\n");
 		goto release;
 	}
-	init_waitqueue_head(&tl_tmr->tl_tmr_wait);
+	init_completion(&tl_tmr->done);
 
 	se_cmd = &tl_cmd->tl_se_cmd;
 	se_tpg = &tl_tpg->tl_se_tpg;
@@ -276,7 +276,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	 * tcm_loop_queue_tm_rsp() to wake us up.
 	 */
 	transport_generic_handle_tmr(se_cmd);
-	wait_event(tl_tmr->tl_tmr_wait, atomic_read(&tl_tmr->tmr_complete));
+	wait_for_completion(&tl_tmr->done);
 	/*
 	 * The TMR LUN_RESET has completed, check the response status and
 	 * then release allocations.
@@ -671,12 +671,9 @@ static void tcm_loop_queue_tm_rsp(struct se_cmd *se_cmd)
 {
 	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
 	struct tcm_loop_tmr *tl_tmr = se_tmr->fabric_tmr_ptr;
-	/*
-	 * The SCSI EH thread will be sleeping on se_tmr->tl_tmr_wait, go ahead
-	 * and wake up the wait_queue_head_t in tcm_loop_device_reset()
-	 */
-	atomic_set(&tl_tmr->tmr_complete, 1);
-	wake_up(&tl_tmr->tl_tmr_wait);
+
+	/* Wake up tcm_loop_issue_tmr(). */
+	complete(&tl_tmr->done);
 }
 
 static void tcm_loop_aborted_task(struct se_cmd *se_cmd)
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index a8a230b4e6b5..f54b605c33c4 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -21,8 +21,7 @@ struct tcm_loop_cmd {
 };
 
 struct tcm_loop_tmr {
-	atomic_t tmr_complete;
-	wait_queue_head_t tl_tmr_wait;
+	struct completion done;
 };
 
 struct tcm_loop_nexus {
-- 
2.12.2

From 1b09afac4c85a423f24397de43ee4582d0d1aac7 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Date: Wed, 10 May 2017 14:30:11 -0700
Subject: [PATCH 4/5] target/tcm_loop: Simplify the task management function
 implementation

Use target_submit_tmr() instead of open-coding this function. The
only functional change is that TMFs are now added to sess_cmd_list,
something the current code does not do.

Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: David Disseldorp <ddiss@xxxxxxx>
---
 drivers/target/loopback/tcm_loop.c | 33 ++++-----------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 25b6cab5e158..48d751c53104 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -218,7 +218,6 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 {
 	struct se_cmd *se_cmd = NULL;
 	struct se_session *se_sess;
-	struct se_portal_group *se_tpg;
 	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
 	struct tcm_loop_tmr *tl_tmr = NULL;
@@ -248,40 +247,16 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 	init_completion(&tl_tmr->done);
 
 	se_cmd = &tl_cmd->tl_se_cmd;
-	se_tpg = &tl_tpg->tl_se_tpg;
 	se_sess = tl_tpg->tl_nexus->se_sess;
-	/*
-	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
-	 */
-	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
-				DMA_NONE, TCM_SIMPLE_TAG,
-				&tl_cmd->tl_sense_buf[0]);
 
-	rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL);
+	rc = target_submit_tmr(se_cmd, se_sess, NULL, 0 /* unpacked_lun */,
+			       tl_tmr, tmr, GFP_KERNEL, TCM_SIMPLE_TAG,
+			       0 /*flags*/);
 	if (rc < 0)
 		goto release;
-
-	if (tmr == TMR_ABORT_TASK)
-		se_cmd->se_tmr_req->ref_task_tag = task;
-
-	/*
-	 * Locate the underlying TCM struct se_lun
-	 */
-	if (transport_lookup_tmr_lun(se_cmd, lun) < 0) {
-		ret = TMR_LUN_DOES_NOT_EXIST;
-		goto release;
-	}
-	/*
-	 * Queue the TMR to TCM Core and sleep waiting for
-	 * tcm_loop_queue_tm_rsp() to wake us up.
-	 */
-	transport_generic_handle_tmr(se_cmd);
 	wait_for_completion(&tl_tmr->done);
-	/*
-	 * The TMR LUN_RESET has completed, check the response status and
-	 * then release allocations.
-	 */
 	ret = se_cmd->se_tmr_req->response;
+
 release:
 	if (se_cmd)
 		transport_generic_free_cmd(se_cmd, 1);
-- 
2.12.2

From 4c961dc45bb5d35cdefa8f0f94ebd8453a9d6cc4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Date: Wed, 10 May 2017 15:01:38 -0700
Subject: [PATCH 5/5] target/tcm_loop: Make TMF processing slightly faster

Target drivers must guarantee that struct se_cmd and struct se_tmr_req
exist as long as target_tmr_work() is in progress. This is why the
tcm_loop driver today passes 1 as second argument to
transport_generic_free_cmd() from inside the TMF code. Instead of
making the TMF code wait, make the TMF code obtain two references
(SCF_ACK_KREF) and drop one reference from inside the .check_stop_free()
callback.

Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxxx>
Cc: David Disseldorp <ddiss@xxxxxxx>
---
 drivers/target/loopback/tcm_loop.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 48d751c53104..136d70285f9a 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -51,27 +51,18 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd);
  */
 static int tcm_loop_check_stop_free(struct se_cmd *se_cmd)
 {
-	/*
-	 * Do not release struct se_cmd's containing a valid TMR
-	 * pointer.  These will be released directly in tcm_loop_device_reset()
-	 * with transport_generic_free_cmd().
-	 */
-	if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
-		return 0;
-	/*
-	 * Release the struct se_cmd, which will make a callback to release
-	 * struct tcm_loop_cmd * in tcm_loop_deallocate_core_cmd()
-	 */
-	transport_generic_free_cmd(se_cmd, 0);
-	return 1;
+	return transport_generic_free_cmd(se_cmd, 0);
 }
 
 static void tcm_loop_release_cmd(struct se_cmd *se_cmd)
 {
 	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
 				struct tcm_loop_cmd, tl_se_cmd);
+	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
+	struct tcm_loop_tmr *tl_tmr = se_tmr ? se_tmr->fabric_tmr_ptr : NULL;
 
 	kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
+	kfree(tl_tmr);
 }
 
 static int tcm_loop_show_info(struct seq_file *m, struct Scsi_Host *host)
@@ -251,19 +242,23 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
 
 	rc = target_submit_tmr(se_cmd, se_sess, NULL, 0 /* unpacked_lun */,
 			       tl_tmr, tmr, GFP_KERNEL, TCM_SIMPLE_TAG,
-			       0 /*flags*/);
+			       TARGET_SCF_ACK_KREF);
 	if (rc < 0)
 		goto release;
 	wait_for_completion(&tl_tmr->done);
 	ret = se_cmd->se_tmr_req->response;
+	target_put_sess_cmd(se_cmd);
+
+out:
+	return ret;
 
 release:
 	if (se_cmd)
-		transport_generic_free_cmd(se_cmd, 1);
+		transport_generic_free_cmd(se_cmd, 0);
 	else
 		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
 	kfree(tl_tmr);
-	return ret;
+	goto out;
 }
 
 static int tcm_loop_abort_task(struct scsi_cmnd *sc)
-- 
2.12.2


[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]