On Thu, 2017-11-30 at 16:08 +0000, Bart Van Assche wrote:
On Thu, 2017-11-30 at 09:18 +0800, Jason Yan wrote:
Hi Bart, I chose the approach in my patch because it has been used
in scsi_device_get() for years and been proved safe. I think using
kobject_get_unless_zero() is safe here and can fix this issue too.
And this approach is beneficial to all users.
Hello Jason,
A possible approach is that we start with your patch and defer any
get_device() changes until after your patch has been applied.
It's possible, but not quite good enough: the same race can be produced
with any of our sdev lists that are deleted in the release callback,
because there could be a released device on any one of them. The only
way to mediate it properly is to get a reference in the iterator using
kobject_get_unless_zero().
It's a bit like a huge can of worms, there's another problem every time
I look. However, this is something like the mechanism that could work
(and if get_device() ever gets fixed, we can put it in place of
kobject_get_unless_zero()).
James
---
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 6be77b3aa8a5..c3246f26c02c 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1169,6 +1169,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
}
+ put_device(&SDp->sdev_gendev);
} else if(dsps == A_RESELECTED_DURING_SELECTION) {
/* This section is full of debugging code because I've
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index c3fc34b9964d..7736f3fb2501 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -1198,6 +1198,7 @@ static int esp_reconnect(struct esp *esp)
goto do_reset;
}
lp = dev->hostdata;
+ put_device(&dev->sdev_gendev);
ent = lp->non_tagged_cmd;
if (!ent) {
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index a7e4fba724b7..c96c11716152 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -677,11 +677,10 @@ struct scsi_device *__scsi_device_lookup_by_target(struct scsi_target *starget,
{
struct scsi_device *sdev;
- list_for_each_entry(sdev, &starget->devices, same_target_siblings) {
- if (sdev->sdev_state == SDEV_DEL)
- continue;
- if (sdev->lun ==lun)
+ __sdev_for_each_get(sdev, &starget->devices, same_target_siblings) {
+ if (sdev->sdev_state != SDEV_DEL && sdev->lun ==lun)
return sdev;
+ put_device(&sdev->sdev_gendev);
}
return NULL;
@@ -700,15 +699,16 @@ EXPORT_SYMBOL(__scsi_device_lookup_by_target);
struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget,
u64 lun)
{
- struct scsi_device *sdev;
+ struct scsi_device *sdev, *sdev_copy;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
unsigned long flags;
spin_lock_irqsave(shost->host_lock, flags);
- sdev = __scsi_device_lookup_by_target(starget, lun);
+ sdev_copy = sdev = __scsi_device_lookup_by_target(starget, lun);
+ spin_unlock_irqrestore(shost->host_lock, flags);
if (sdev && scsi_device_get(sdev))
sdev = NULL;
- spin_unlock_irqrestore(shost->host_lock, flags);
+ put_device(&sdev_copy->sdev_gendev);
return sdev;
}
@@ -735,12 +735,12 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost,
{
struct scsi_device *sdev;
- list_for_each_entry(sdev, &shost->__devices, siblings) {
- if (sdev->sdev_state == SDEV_DEL)
- continue;
- if (sdev->channel == channel && sdev->id == id &&
- sdev->lun ==lun)
+ __sdev_for_each_get(sdev, &shost->__devices, siblings) {
+ if (sdev->sdev_state != SDEV_DEL &&
+ sdev->channel == channel && sdev->id == id &&
+ sdev->lun ==lun)
return sdev;
+ put_device(&sdev->sdev_gendev);
}
return NULL;
@@ -761,14 +761,15 @@ EXPORT_SYMBOL(__scsi_device_lookup);
struct scsi_device *scsi_device_lookup(struct Scsi_Host *shost,
uint channel, uint id, u64 lun)
{
- struct scsi_device *sdev;
+ struct scsi_device *sdev, *sdev_copy;
unsigned long flags;
spin_lock_irqsave(shost->host_lock, flags);
- sdev = __scsi_device_lookup(shost, channel, id, lun);
+ sdev_copy = sdev = __scsi_device_lookup(shost, channel, id, lun);
+ spin_unlock_irqrestore(shost->host_lock, flags);
if (sdev && scsi_device_get(sdev))
sdev = NULL;
- spin_unlock_irqrestore(shost->host_lock, flags);
+ put_device(&sdev_copy->sdev_gendev);
return sdev;
}
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 40124648a07b..cddd5a93e962 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1870,11 +1870,14 @@ void scsi_forget_host(struct Scsi_Host *shost)
restart:
spin_lock_irqsave(shost->host_lock, flags);
- list_for_each_entry(sdev, &shost->__devices, siblings) {
- if (sdev->sdev_state == SDEV_DEL)
+ __sdev_for_each_get(sdev, &shost->__devices, siblings) {
+ if (sdev->sdev_state == SDEV_DEL) {
+ put_device(&sdev->sdev_gendev);
continue;
+ }
spin_unlock_irqrestore(shost->host_lock, flags);
__scsi_remove_device(sdev);
+ put_device(&sdev->sdev_gendev);
goto restart;
}
spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index f796bd61f3f0..380404ec49cd 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1375,17 +1375,7 @@ static void __scsi_remove_target(struct scsi_target *starget)
spin_lock_irqsave(shost->host_lock, flags);
restart:
- list_for_each_entry(sdev, &shost->__devices, siblings) {
- /*
- * We cannot call scsi_device_get() here, as
- * we might've been called from rmmod() causing
- * scsi_device_get() to fail the module_is_live()
- * check.
- */
- if (sdev->channel != starget->channel ||
- sdev->id != starget->id ||
- !get_device(&sdev->sdev_gendev))
- continue;
+ __sdev_for_each_get(sdev, &starget->devices, same_target_siblings) {
spin_unlock_irqrestore(shost->host_lock, flags);
scsi_remove_device(sdev);
put_device(&sdev->sdev_gendev);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 571ddb49b926..2e4d48d8cd68 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -380,6 +380,23 @@ extern struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *,
#define __shost_for_each_device(sdev, shost) \
list_for_each_entry((sdev), &((shost)->__devices), siblings)