Re: [PATCH 0/2] sd: Fix a deadlock between event checking and device removal

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2017-11-14 at 18:01 +0100, Jack Wang wrote:
> I suspect we run into same bug you were trying to fix in this patch
> set. we're running in v4.4.50
> 
> I was trying to reproduce it, but no lucky yet, do you still have your
> reproducer?

Hello Jack,

I can reproduce this about every fifth run of test one of the srp-test
software and with the SRP initiator and target drivers of what will become
kernel v4.15-rc1 and by switching the ib_srpt driver from non-SRQ to SRQ
mode while the initiator is logging in. I'm currently analyzing where in the
block layer a queue run is missing. The patch below for the sd driver does
not fix the root cause but seems to help.

Bart.


Subject: [PATCH] Increase SCSI disk probing concurrency

---
 drivers/scsi/scsi.c        |  5 -----
 drivers/scsi/scsi_pm.c     |  6 ++++--
 drivers/scsi/scsi_priv.h   |  1 -
 drivers/scsi/sd.c          | 26 +++++++++++++++++++++-----
 drivers/scsi/sd.h          |  1 +
 include/scsi/scsi_driver.h |  1 +
 6 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index a7e4fba724b7..e6d69e647f6a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -85,10 +85,6 @@ unsigned int scsi_logging_level;
 EXPORT_SYMBOL(scsi_logging_level);
 #endif
 
-/* sd, scsi core and power management need to coordinate flushing async actions */
-ASYNC_DOMAIN(scsi_sd_probe_domain);
-EXPORT_SYMBOL(scsi_sd_probe_domain);
-
 /*
  * Separate domain (from scsi_sd_probe_domain) to maximize the benefit of
  * asynchronous system resume operations.  It is marked 'exclusive' to avoid
@@ -839,7 +835,6 @@ static void __exit exit_scsi(void)
 	scsi_exit_devinfo();
 	scsi_exit_procfs();
 	scsi_exit_queue();
-	async_unregister_domain(&scsi_sd_probe_domain);
 }
 
 subsys_initcall(init_scsi);
diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index b44c1bb687a2..d8e43c2f4d40 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c
@@ -171,9 +171,11 @@ static int scsi_bus_resume_common(struct device *dev,
 static int scsi_bus_prepare(struct device *dev)
 {
 	if (scsi_is_sdev_device(dev)) {
-		/* sd probing uses async_schedule.  Wait until it finishes. */
-		async_synchronize_full_domain(&scsi_sd_probe_domain);
+		struct scsi_driver *drv = to_scsi_driver(dev->driver);
 
+		/* sd probing happens asynchronously. Wait until it finishes. */
+		if (drv->sync)
+			drv->sync(dev);
 	} else if (scsi_is_host_device(dev)) {
 		/* Wait until async scanning is finished */
 		scsi_complete_async_scans();
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index dab29f538612..bf0cadf6a321 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -174,7 +174,6 @@ static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
 #endif /* CONFIG_PM */
 
 extern struct async_domain scsi_sd_pm_domain;
-extern struct async_domain scsi_sd_probe_domain;
 
 /* scsi_dh.c */
 #ifdef CONFIG_SCSI_DH
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0313486d85c8..c26dbb38b60c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -112,6 +112,7 @@ static void sd_shutdown(struct device *);
 static int sd_suspend_system(struct device *);
 static int sd_suspend_runtime(struct device *);
 static int sd_resume(struct device *);
+static void sd_sync_probe_domain(struct device *dev);
 static void sd_rescan(struct device *);
 static int sd_init_command(struct scsi_cmnd *SCpnt);
 static void sd_uninit_command(struct scsi_cmnd *SCpnt);
@@ -564,6 +565,7 @@ static struct scsi_driver sd_template = {
 		.shutdown	= sd_shutdown,
 		.pm		= &sd_pm_ops,
 	},
+	.sync			= sd_sync_probe_domain,
 	.rescan			= sd_rescan,
 	.init_command		= sd_init_command,
 	.uninit_command		= sd_uninit_command,
@@ -3221,9 +3223,9 @@ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
 /*
  * The asynchronous part of sd_probe
  */
-static void sd_probe_async(void *data, async_cookie_t cookie)
+static void sd_probe_async(struct work_struct *work)
 {
-	struct scsi_disk *sdkp = data;
+	struct scsi_disk *sdkp = container_of(work, typeof(*sdkp), probe_work);
 	struct scsi_device *sdp;
 	struct gendisk *gd;
 	u32 index;
@@ -3326,6 +3328,8 @@ static int sd_probe(struct device *dev)
 	if (!sdkp)
 		goto out;
 
+	INIT_WORK(&sdkp->probe_work, sd_probe_async);
+
 	gd = alloc_disk(SD_MINORS);
 	if (!gd)
 		goto out_free;
@@ -3377,8 +3381,8 @@ static int sd_probe(struct device *dev)
 	get_device(dev);
 	dev_set_drvdata(dev, sdkp);
 
-	get_device(&sdkp->dev);	/* prevent release before async_schedule */
-	async_schedule_domain(sd_probe_async, sdkp, &scsi_sd_probe_domain);
+	get_device(&sdkp->dev);	/* prevent release before sd_probe_async() */
+	WARN_ON_ONCE(!queue_work(system_unbound_wq, &sdkp->probe_work));
 
 	return 0;
 
@@ -3395,6 +3399,18 @@ static int sd_probe(struct device *dev)
 	return error;
 }
 
+static void sd_wait_for_probing(struct scsi_disk *sdkp)
+{
+	flush_work(&sdkp->probe_work);
+}
+
+static void sd_sync_probe_domain(struct device *dev)
+{
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+	sd_wait_for_probing(sdkp);
+}
+
 /**
  *	sd_remove - called whenever a scsi disk (previously recognized by
  *	sd_probe) is detached from the system. It is called (potentially
@@ -3416,7 +3432,7 @@ static int sd_remove(struct device *dev)
 	scsi_autopm_get_device(sdkp->device);
 
 	async_synchronize_full_domain(&scsi_sd_pm_domain);
-	async_synchronize_full_domain(&scsi_sd_probe_domain);
+	sd_wait_for_probing(sdkp);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 7b57dafcd45a..2cc47183c9aa 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -81,6 +81,7 @@ struct scsi_disk {
 	unsigned int	zones_optimal_nonseq;
 	unsigned int	zones_max_open;
 #endif
+	struct work_struct probe_work;
 	atomic_t	openers;
 	sector_t	capacity;	/* size in logical blocks */
 	u32		max_xfer_blocks;
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index a5534ccad859..145d6239eecf 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -11,6 +11,7 @@ struct scsi_device;
 struct scsi_driver {
 	struct device_driver	gendrv;
 
+	void (*sync)(struct device *);
 	void (*rescan)(struct device *);
 	int (*init_command)(struct scsi_cmnd *);
 	void (*uninit_command)(struct scsi_cmnd *);
-- 
2.15.0




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux