Re: [PATCH] ata: libata-scsi: Avoid deadlock on rescan after device resume

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jun 15, 2023 at 4:33 PM Damien Le Moal <dlemoal@xxxxxxxxxx> wrote:
>
> When an ATA port is resumed from sleep, the port is reset and a power
> management request issued to libata EH to reset the port and rescanning
> the device(s) attached to the port. Device rescanning is done by
> scheduling an ata_scsi_dev_rescan() work, which will execute
> scsi_rescan_device().
>
> However, scsi_rescan_device() takes the generic device lock, which is
> also taken by dpm_resume() when the SCSI device is resumed as well. If
> a device rescan execution starts before the completion of the SCSI
> device resume, the rcu locking used to refresh the cached VPD pages of
> the device, combined with the generic device locking from
> scsi_rescan_device() and from dpm_resume() can cause a deadlock.
>
> Avoid this situation by changing struct ata_port scsi_rescan_task to be
> a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
> modified to check if the SCSI device associated with the ATA device that
> must be rescanned is not suspended. If the SCSI device is still
> suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
> execution after an arbitrary delay of 5ms.
>
> Reported-by: Kai-Heng Feng <kai.heng.feng@xxxxxxxxxxxxx>
> Reported-by: Joe Breuer <linux-kernel@xxxxxxxxxxxx>
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
> Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
> Signed-off-by: Damien Le Moal <dlemoal@xxxxxxxxxx>

Tested-by: Kai-Heng Feng <kai.heng.feng@xxxxxxxxxxxxx>

> ---
>  drivers/ata/libata-core.c |  3 ++-
>  drivers/ata/libata-eh.c   |  2 +-
>  drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
>  include/linux/libata.h    |  2 +-
>  4 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index 8bf612bdd61a..b4f246f0cac7 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
>
>         mutex_init(&ap->scsi_scan_mutex);
>         INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
> -       INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
> +       INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
>         INIT_LIST_HEAD(&ap->eh_done_q);
>         init_waitqueue_head(&ap->eh_wait_q);
>         init_completion(&ap->park_req_pending);
> @@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
>         WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
>
>         cancel_delayed_work_sync(&ap->hotplug_task);
> +       cancel_delayed_work_sync(&ap->scsi_rescan_task);
>
>   skip_eh:
>         /* clean up zpodd on port removal */
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index a6c901811802..6f8d14191593 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
>                         ehc->i.flags |= ATA_EHI_SETMODE;
>
>                         /* schedule the scsi_rescan_device() here */
> -                       schedule_work(&(ap->scsi_rescan_task));
> +                       schedule_delayed_work(&ap->scsi_rescan_task, 0);
>                 } else if (dev->class == ATA_DEV_UNKNOWN &&
>                            ehc->tries[dev->devno] &&
>                            ata_class_enabled(ehc->classes[dev->devno])) {
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 8ce90284eb34..551077cea4e4 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
>  void ata_scsi_dev_rescan(struct work_struct *work)
>  {
>         struct ata_port *ap =
> -               container_of(work, struct ata_port, scsi_rescan_task);
> +               container_of(work, struct ata_port, scsi_rescan_task.work);
>         struct ata_link *link;
>         struct ata_device *dev;
>         unsigned long flags;
> +       bool delay_rescan = false;
>
>         mutex_lock(&ap->scsi_scan_mutex);
>         spin_lock_irqsave(ap->lock, flags);
> @@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>                         if (scsi_device_get(sdev))
>                                 continue;
>
> +                       /*
> +                        * If the rescan work was scheduled because of a resume
> +                        * event, the port is already fully resumed, but the
> +                        * SCSI device may not yet be fully resumed. In such
> +                        * case, executing scsi_rescan_device() may cause a
> +                        * deadlock with the PM code on device_lock(). Prevent
> +                        * this by giving up and retrying rescan after a short
> +                        * delay.
> +                        */
> +                       delay_rescan = sdev->sdev_gendev.power.is_suspended;
> +                       if (delay_rescan) {
> +                               scsi_device_put(sdev);
> +                               break;
> +                       }
> +
>                         spin_unlock_irqrestore(ap->lock, flags);
>                         scsi_rescan_device(&(sdev->sdev_gendev));
>                         scsi_device_put(sdev);
> @@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
>
>         spin_unlock_irqrestore(ap->lock, flags);
>         mutex_unlock(&ap->scsi_scan_mutex);
> +
> +       if (delay_rescan)
> +               schedule_delayed_work(&ap->scsi_rescan_task,
> +                                     msecs_to_jiffies(5));
>  }
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index 311cd93377c7..dd5797fb6305 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -836,7 +836,7 @@ struct ata_port {
>
>         struct mutex            scsi_scan_mutex;
>         struct delayed_work     hotplug_task;
> -       struct work_struct      scsi_rescan_task;
> +       struct delayed_work     scsi_rescan_task;
>
>         unsigned int            hsm_task_state;
>
> --
> 2.40.1
>




[Index of Archives]     [Linux Filesystems]     [Linux SCSI]     [Linux RAID]     [Git]     [Kernel Newbies]     [Linux Newbie]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Samba]     [Device Mapper]

  Powered by Linux