Re: [PATCH] scsi: fix race condition when removing target

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2017/12/1 7:56, James Bottomley wrote:
On Thu, 2017-11-30 at 16:08 +0000, Bart Van Assche wrote:
On Thu, 2017-11-30 at 09:18 +0800, Jason Yan wrote:

Hi Bart, I chose the approach in my patch because it has been used
in scsi_device_get() for years and been proved safe. I think using
kobject_get_unless_zero() is safe here and can fix this issue too.
And this approach is beneficial to all users.

Hello Jason,

A possible approach is that we start with your patch and defer any
get_device() changes until after your patch has been applied.

It's possible, but not quite good enough: the same race can be produced
with any of our sdev lists that are deleted in the release callback,
because there could be a released device on any one of them.  The only
way to mediate it properly is to get a reference in the iterator using
kobject_get_unless_zero().

It's a bit like a huge can of worms, there's another problem every time
I look.  However, this is something like the mechanism that could work
(and if get_device() ever gets fixed, we can put it in place of
kobject_get_unless_zero()).

James

---

diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 6be77b3aa8a5..c3246f26c02c 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1169,6 +1169,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,

  			
  		}
+		put_device(&SDp->sdev_gendev);
  	} else if(dsps == A_RESELECTED_DURING_SELECTION) {

  		/* This section is full of debugging code because I've
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index c3fc34b9964d..7736f3fb2501 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -1198,6 +1198,7 @@ static int esp_reconnect(struct esp *esp)
  		goto do_reset;
  	}
  	lp = dev->hostdata;
+	put_device(&dev->sdev_gendev);

  	ent = lp->non_tagged_cmd;
  	if (!ent) {
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index a7e4fba724b7..c96c11716152 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -677,11 +677,10 @@ struct scsi_device *__scsi_device_lookup_by_target(struct scsi_target *starget,
  {
  	struct scsi_device *sdev;

-	list_for_each_entry(sdev, &starget->devices, same_target_siblings) {
-		if (sdev->sdev_state == SDEV_DEL)
-			continue;
-		if (sdev->lun ==lun)
+	__sdev_for_each_get(sdev, &starget->devices, same_target_siblings) {
+		if (sdev->sdev_state != SDEV_DEL && sdev->lun ==lun)
  			return sdev;
+		put_device(&sdev->sdev_gendev);
  	}

  	return NULL;
@@ -700,15 +699,16 @@ EXPORT_SYMBOL(__scsi_device_lookup_by_target);
  struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget,
  						 u64 lun)
  {
-	struct scsi_device *sdev;
+  struct scsi_device *sdev, *sdev_copy;
  	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
  	unsigned long flags;

  	spin_lock_irqsave(shost->host_lock, flags);
-	sdev = __scsi_device_lookup_by_target(starget, lun);
+	sdev_copy = sdev = __scsi_device_lookup_by_target(starget, lun);
+	spin_unlock_irqrestore(shost->host_lock, flags);
  	if (sdev && scsi_device_get(sdev))
  		sdev = NULL;
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	put_device(&sdev_copy->sdev_gendev);

  	return sdev;
  }
@@ -735,12 +735,12 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost,
  {
  	struct scsi_device *sdev;

-	list_for_each_entry(sdev, &shost->__devices, siblings) {
-		if (sdev->sdev_state == SDEV_DEL)
-			continue;
-		if (sdev->channel == channel && sdev->id == id &&
-				sdev->lun ==lun)
+	__sdev_for_each_get(sdev, &shost->__devices, siblings) {
+		if (sdev->sdev_state != SDEV_DEL &&
+		    sdev->channel == channel && sdev->id == id &&
+		    sdev->lun ==lun)
  			return sdev;
+		put_device(&sdev->sdev_gendev);
  	}

  	return NULL;
@@ -761,14 +761,15 @@ EXPORT_SYMBOL(__scsi_device_lookup);
  struct scsi_device *scsi_device_lookup(struct Scsi_Host *shost,
  		uint channel, uint id, u64 lun)
  {
-	struct scsi_device *sdev;
+  struct scsi_device *sdev, *sdev_copy;
  	unsigned long flags;

  	spin_lock_irqsave(shost->host_lock, flags);
-	sdev = __scsi_device_lookup(shost, channel, id, lun);
+	sdev_copy = sdev = __scsi_device_lookup(shost, channel, id, lun);
+	spin_unlock_irqrestore(shost->host_lock, flags);
  	if (sdev && scsi_device_get(sdev))
  		sdev = NULL;
-	spin_unlock_irqrestore(shost->host_lock, flags);
+	put_device(&sdev_copy->sdev_gendev);

  	return sdev;
  }
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 40124648a07b..cddd5a93e962 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1870,11 +1870,14 @@ void scsi_forget_host(struct Scsi_Host *shost)

   restart:
  	spin_lock_irqsave(shost->host_lock, flags);
-	list_for_each_entry(sdev, &shost->__devices, siblings) {
-		if (sdev->sdev_state == SDEV_DEL)
+	__sdev_for_each_get(sdev, &shost->__devices, siblings) {
+		if (sdev->sdev_state == SDEV_DEL) {
+			put_device(&sdev->sdev_gendev);
  			continue;
+		}
  		spin_unlock_irqrestore(shost->host_lock, flags);
  		__scsi_remove_device(sdev);
+		put_device(&sdev->sdev_gendev);
  		goto restart;
  	}
  	spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index f796bd61f3f0..380404ec49cd 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1375,17 +1375,7 @@ static void __scsi_remove_target(struct scsi_target *starget)

  	spin_lock_irqsave(shost->host_lock, flags);
   restart:
-	list_for_each_entry(sdev, &shost->__devices, siblings) {
-		/*
-		 * We cannot call scsi_device_get() here, as
-		 * we might've been called from rmmod() causing
-		 * scsi_device_get() to fail the module_is_live()
-		 * check.
-		 */
-		if (sdev->channel != starget->channel ||
-		    sdev->id != starget->id ||
-		    !get_device(&sdev->sdev_gendev))
-			continue;
+	__sdev_for_each_get(sdev, &starget->devices, same_target_siblings) {
  		spin_unlock_irqrestore(shost->host_lock, flags);
  		scsi_remove_device(sdev);
  		put_device(&sdev->sdev_gendev);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 571ddb49b926..2e4d48d8cd68 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -380,6 +380,23 @@ extern struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *,
  #define __shost_for_each_device(sdev, shost) \
  	list_for_each_entry((sdev), &((shost)->__devices), siblings)


Seems that __shost_for_each_device() is still not safe. scsi device been deleted stays in the list and put_device() can be called anywhere out of the host lock.

+/**
+ * __sdev_list_for_each_get - get a reference to each element
+ * @sdev: the scsi device to use in the body
+ * @head: the head of the list
+ * @list: the element (sdev->list) containing list members
+ *
+ * Iterator that only executes the body if it can obtain a reference
+ * to the element.  This closes a race where the device release can
+ * have been called, but the element is still on the lists.
+ *
+ * The lock protecting the list (the host lock) must be held before
+ * calling this iterator
+ */
+#define __sdev_for_each_get(sdev, head, list)				\
+	list_for_each_entry(sdev, head, list)				\
+		if (kobject_get_unless_zero(&sdev->sdev_gendev.kobj))
+
  extern int scsi_change_queue_depth(struct scsi_device *, int);
  extern int scsi_track_queue_full(struct scsi_device *, int);



.





[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux