Re: [PATCH v2] scsi: libsas: defer ata device eh commands to libata

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



2018-03-07 8:47 GMT+01:00 Jason Yan <yanaijie@xxxxxxxxxx>:
> When ata device doing EH, some commands still attached with tasks are not
> passed to libata when abort failed or recover failed, so libata did not
> handle these commands. After these commands done, sas task is freed, but
> ata qc is not freed. This will cause ata qc leak and trigger a warning
> like below:
>
> WARNING: CPU: 0 PID: 28512 at drivers/ata/libata-eh.c:4037
> ata_eh_finish+0xb4/0xcc
> CPU: 0 PID: 28512 Comm: kworker/u32:2 Tainted: G     W  OE 4.14.0#1
> ......
> Call trace:
> [<ffff0000088b7bd0>] ata_eh_finish+0xb4/0xcc
> [<ffff0000088b8420>] ata_do_eh+0xc4/0xd8
> [<ffff0000088b8478>] ata_std_error_handler+0x44/0x8c
> [<ffff0000088b8068>] ata_scsi_port_error_handler+0x480/0x694
> [<ffff000008875fc4>] async_sas_ata_eh+0x4c/0x80
> [<ffff0000080f6be8>] async_run_entry_fn+0x4c/0x170
> [<ffff0000080ebd70>] process_one_work+0x144/0x390
> [<ffff0000080ec100>] worker_thread+0x144/0x418
> [<ffff0000080f2c98>] kthread+0x10c/0x138
> [<ffff0000080855dc>] ret_from_fork+0x10/0x18
>
> If ata qc leaked too many, ata tag allocation will fail and io blocked
> for ever.
>
> As suggested by Dan Williams, defer ata device commands to libata and
> merge sas_eh_finish_cmd() with sas_eh_defer_cmd(). libata will handle
> ata qcs correctly after this.
>
> Signed-off-by: Jason Yan <yanaijie@xxxxxxxxxx>
> CC: Xiaofei Tan <tanxiaofei@xxxxxxxxxx>
> CC: John Garry <john.garry@xxxxxxxxxx>
> CC: Dan Williams <dan.j.williams@xxxxxxxxx>
Hi Jason,

Would be good if you add the Fixes tag, so distribution could pick it up easily.
And wasn't the idea to delete sas_eh_finish_cmd instead of sas_eh_defer_cmd?


Thanks,
Jack
> ---
>  drivers/scsi/libsas/sas_scsi_host.c | 30 ++++++++++--------------------
>  1 file changed, 10 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
> index 6de9681ace82..fd76436b289c 100644
> --- a/drivers/scsi/libsas/sas_scsi_host.c
> +++ b/drivers/scsi/libsas/sas_scsi_host.c
> @@ -223,6 +223,7 @@ int sas_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>  static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
>  {
>         struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(cmd->device->host);
> +       struct domain_device *dev = cmd_to_domain_dev(cmd);
>         struct sas_task *task = TO_SAS_TASK(cmd);
>
>         /* At this point, we only get called following an actual abort
> @@ -231,6 +232,11 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
>          */
>         sas_end_task(cmd, task);
>
> +       if (dev_is_sata(dev)) {
> +               list_move_tail(&cmd->eh_entry, &sas_ha->eh_ata_q);
> +               return;
> +       }
> +
>         /* now finish the command and move it on to the error
>          * handler done list, this also takes it off the
>          * error handler pending list.
> @@ -238,22 +244,6 @@ static void sas_eh_finish_cmd(struct scsi_cmnd *cmd)
>         scsi_eh_finish_cmd(cmd, &sas_ha->eh_done_q);
>  }
>
> -static void sas_eh_defer_cmd(struct scsi_cmnd *cmd)
> -{
> -       struct domain_device *dev = cmd_to_domain_dev(cmd);
> -       struct sas_ha_struct *ha = dev->port->ha;
> -       struct sas_task *task = TO_SAS_TASK(cmd);
> -
> -       if (!dev_is_sata(dev)) {
> -               sas_eh_finish_cmd(cmd);
> -               return;
> -       }
> -
> -       /* report the timeout to libata */
> -       sas_end_task(cmd, task);
> -       list_move_tail(&cmd->eh_entry, &ha->eh_ata_q);
> -}
> -
>  static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd *my_cmd)
>  {
>         struct scsi_cmnd *cmd, *n;
> @@ -261,7 +251,7 @@ static void sas_scsi_clear_queue_lu(struct list_head *error_q, struct scsi_cmnd
>         list_for_each_entry_safe(cmd, n, error_q, eh_entry) {
>                 if (cmd->device->sdev_target == my_cmd->device->sdev_target &&
>                     cmd->device->lun == my_cmd->device->lun)
> -                       sas_eh_defer_cmd(cmd);
> +                       sas_eh_finish_cmd(cmd);
>         }
>  }
>
> @@ -631,12 +621,12 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
>                 case TASK_IS_DONE:
>                         SAS_DPRINTK("%s: task 0x%p is done\n", __func__,
>                                     task);
> -                       sas_eh_defer_cmd(cmd);
> +                       sas_eh_finish_cmd(cmd);
>                         continue;
>                 case TASK_IS_ABORTED:
>                         SAS_DPRINTK("%s: task 0x%p is aborted\n",
>                                     __func__, task);
> -                       sas_eh_defer_cmd(cmd);
> +                       sas_eh_finish_cmd(cmd);
>                         continue;
>                 case TASK_IS_AT_LU:
>                         SAS_DPRINTK("task 0x%p is at LU: lu recover\n", task);
> @@ -647,7 +637,7 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
>                                             "recovered\n",
>                                             SAS_ADDR(task->dev),
>                                             cmd->device->lun);
> -                               sas_eh_defer_cmd(cmd);
> +                               sas_eh_finish_cmd(cmd);
>                                 sas_scsi_clear_queue_lu(work_q, cmd);
>                                 goto Again;
>                         }
> --
> 2.13.6
>



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux