Patch "scsi: sd: Fix TCG OPAL unlock on system resume" has been added to the 6.1-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    scsi: sd: Fix TCG OPAL unlock on system resume

to the 6.1-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     scsi-sd-fix-tcg-opal-unlock-on-system-resume.patch
and it can be found in the queue-6.1 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From 0c76106cb97548810214def8ee22700bbbb90543 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@xxxxxxxxxx>
Date: Tue, 19 Mar 2024 16:12:09 +0900
Subject: scsi: sd: Fix TCG OPAL unlock on system resume

From: Damien Le Moal <dlemoal@xxxxxxxxxx>

commit 0c76106cb97548810214def8ee22700bbbb90543 upstream.

Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop
management") introduced the manage_system_start_stop scsi_device flag to
allow libata to indicate to the SCSI disk driver that nothing should be
done when resuming a disk on system resume. This change turned the
execution of sd_resume() into a no-op for ATA devices on system
resume. While this solved deadlock issues during device resume, this change
also wrongly removed the execution of opal_unlock_from_suspend().  As a
result, devices with TCG OPAL locking enabled remain locked and
inaccessible after a system resume from sleep.

To fix this issue, introduce the SCSI driver resume method and implement it
with the sd_resume() function calling opal_unlock_from_suspend(). The
former sd_resume() function is renamed to sd_resume_common() and modified
to call the new sd_resume() function. For non-ATA devices, this result in
no functional changes.

In order for libata to explicitly execute sd_resume() when a device is
resumed during system restart, the function scsi_resume_device() is
introduced. libata calls this function from the revalidation work executed
on devie resume, a state that is indicated with the new device flag
ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked
on resume, allowing normal operation.

Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538
Cc: stable@xxxxxxxxxxxxxxx
Signed-off-by: Damien Le Moal <dlemoal@xxxxxxxxxx>
Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@xxxxxxxxxx
Signed-off-by: Martin K. Petersen <martin.petersen@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 drivers/ata/libata-eh.c    |    5 ++++-
 drivers/ata/libata-scsi.c  |    9 +++++++++
 drivers/scsi/scsi_scan.c   |   34 ++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c          |   25 +++++++++++++++++++++----
 include/linux/libata.h     |    1 +
 include/scsi/scsi_driver.h |    1 +
 include/scsi/scsi_host.h   |    1 +
 7 files changed, 71 insertions(+), 5 deletions(-)

--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -711,8 +711,10 @@ void ata_scsi_port_error_handler(struct
 					ehc->saved_ncq_enabled |= 1 << devno;
 
 				/* If we are resuming, wake up the device */
-				if (ap->pflags & ATA_PFLAG_RESUMING)
+				if (ap->pflags & ATA_PFLAG_RESUMING) {
+					dev->flags |= ATA_DFLAG_RESUMING;
 					ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+				}
 			}
 		}
 
@@ -3089,6 +3091,7 @@ static int ata_eh_revalidate_and_attach(
 	return 0;
 
  err:
+	dev->flags &= ~ATA_DFLAG_RESUMING;
 	*r_failed_dev = dev;
 	return rc;
 }
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4652,6 +4652,7 @@ void ata_scsi_dev_rescan(struct work_str
 	struct ata_link *link;
 	struct ata_device *dev;
 	unsigned long flags;
+	bool do_resume;
 	int ret = 0;
 
 	mutex_lock(&ap->scsi_scan_mutex);
@@ -4673,7 +4674,15 @@ void ata_scsi_dev_rescan(struct work_str
 			if (scsi_device_get(sdev))
 				continue;
 
+			do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
 			spin_unlock_irqrestore(ap->lock, flags);
+			if (do_resume) {
+				ret = scsi_resume_device(sdev);
+				if (ret == -EWOULDBLOCK)
+					goto unlock;
+				dev->flags &= ~ATA_DFLAG_RESUMING;
+			}
 			ret = scsi_rescan_device(sdev);
 			scsi_device_put(sdev);
 			spin_lock_irqsave(ap->lock, flags);
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1611,6 +1611,40 @@ int scsi_add_device(struct Scsi_Host *ho
 }
 EXPORT_SYMBOL(scsi_add_device);
 
+int scsi_resume_device(struct scsi_device *sdev)
+{
+	struct device *dev = &sdev->sdev_gendev;
+	int ret = 0;
+
+	device_lock(dev);
+
+	/*
+	 * Bail out if the device or its queue are not running. Otherwise,
+	 * the rescan may block waiting for commands to be executed, with us
+	 * holding the device lock. This can result in a potential deadlock
+	 * in the power management core code when system resume is on-going.
+	 */
+	if (sdev->sdev_state != SDEV_RUNNING ||
+	    blk_queue_pm_only(sdev->request_queue)) {
+		ret = -EWOULDBLOCK;
+		goto unlock;
+	}
+
+	if (dev->driver && try_module_get(dev->driver->owner)) {
+		struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+		if (drv->resume)
+			ret = drv->resume(dev);
+		module_put(dev->driver->owner);
+	}
+
+unlock:
+	device_unlock(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
 int scsi_rescan_device(struct scsi_device *sdev)
 {
 	struct device *dev = &sdev->sdev_gendev;
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -110,6 +110,7 @@ static int sd_suspend_system(struct devi
 static int sd_suspend_runtime(struct device *);
 static int sd_resume_system(struct device *);
 static int sd_resume_runtime(struct device *);
+static int sd_resume(struct device *);
 static void sd_rescan(struct device *);
 static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt);
 static void sd_uninit_command(struct scsi_cmnd *SCpnt);
@@ -691,6 +692,7 @@ static struct scsi_driver sd_template =
 		.pm		= &sd_pm_ops,
 	},
 	.rescan			= sd_rescan,
+	.resume			= sd_resume,
 	.init_command		= sd_init_command,
 	.uninit_command		= sd_uninit_command,
 	.done			= sd_done,
@@ -3830,7 +3832,22 @@ static int sd_suspend_runtime(struct dev
 	return sd_suspend_common(dev, true);
 }
 
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+	if (sdkp->device->no_start_on_resume)
+		sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+	if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+		sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
 {
 	struct scsi_disk *sdkp = dev_get_drvdata(dev);
 	int ret = 0;
@@ -3849,7 +3866,7 @@ static int sd_resume(struct device *dev,
 	}
 
 	if (!ret) {
-		opal_unlock_from_suspend(sdkp->opal_dev);
+		sd_resume(dev);
 		sdkp->suspended = false;
 	}
 
@@ -3868,7 +3885,7 @@ static int sd_resume_system(struct devic
 		return 0;
 	}
 
-	return sd_resume(dev, false);
+	return sd_resume_common(dev, false);
 }
 
 static int sd_resume_runtime(struct device *dev)
@@ -3892,7 +3909,7 @@ static int sd_resume_runtime(struct devi
 				  "Failed to clear sense data\n");
 	}
 
-	return sd_resume(dev, true);
+	return sd_resume_common(dev, true);
 }
 
 /**
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -102,6 +102,7 @@ enum {
 	ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */
 	ATA_DFLAG_NCQ_PRIO	= (1 << 20), /* device supports NCQ priority */
 	ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */
+	ATA_DFLAG_RESUMING	= (1 << 22),  /* Device is resuming */
 	ATA_DFLAG_INIT_MASK	= (1 << 24) - 1,
 
 	ATA_DFLAG_DETACH	= (1 << 24),
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -12,6 +12,7 @@ struct request;
 struct scsi_driver {
 	struct device_driver	gendrv;
 
+	int (*resume)(struct device *);
 	void (*rescan)(struct device *);
 	blk_status_t (*init_command)(struct scsi_cmnd *);
 	void (*uninit_command)(struct scsi_cmnd *);
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -752,6 +752,7 @@ extern int __must_check scsi_add_host_wi
 					       struct device *,
 					       struct device *);
 extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
 extern int scsi_rescan_device(struct scsi_device *sdev);
 extern void scsi_remove_host(struct Scsi_Host *);
 extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);


Patches currently in stable-queue which might be from dlemoal@xxxxxxxxxx are

queue-6.1/block-do-not-force-full-zone-append-completion-in-req_bio_endio.patch
queue-6.1/scsi-sd-fix-tcg-opal-unlock-on-system-resume.patch
queue-6.1/block-clear-zone-limits-for-a-non-zoned-stacked-queu.patch
queue-6.1/revert-block-mq-deadline-use-correct-way-to-throttli.patch




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux