Re: [PATCH 2/8] pm80xx: check fatal error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Dec 30, 2020 at 5:47 AM Viswas G <Viswas.G@xxxxxxxxxxxxxxxxx> wrote:
>
> From: akshatzen <akshatzen@xxxxxxxxxx>
>
> When controller runs into fatal error, commands which expect
> response get stuck due to no response. If the controller is
> in fatal error state, abort request issued to the controller
> gets hung too. Hence we should fail it without trying.
>
> Signed-off-by: akshatzen <akshatzen@xxxxxxxxxx>
> Signed-off-by: Viswas G <Viswas.G@xxxxxxxxxxxxx>
> Signed-off-by: Ruksar Devadi <Ruksar.devadi@xxxxxxxxxxxxx>
> Signed-off-by: Radha Ramachandran <radha@xxxxxxxxxx>
Acked-by: Jack Wang <jinpu.wang@xxxxxxxxxxxxxxx>
Thx
> ---
>  drivers/scsi/pm8001/pm8001_hwi.c |  1 +
>  drivers/scsi/pm8001/pm8001_sas.c |  9 +++++++++
>  drivers/scsi/pm8001/pm8001_sas.h |  2 ++
>  drivers/scsi/pm8001/pm80xx_hwi.c | 36 ++++++++++++++++++++++++++++++++++++
>  drivers/scsi/pm8001/pm80xx_hwi.h | 13 +++++++++++++
>  5 files changed, 61 insertions(+)
>
> diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
> index c8d4d87c5473..f147193d67bd 100644
> --- a/drivers/scsi/pm8001/pm8001_hwi.c
> +++ b/drivers/scsi/pm8001/pm8001_hwi.c
> @@ -4998,4 +4998,5 @@ const struct pm8001_dispatch pm8001_8001_dispatch = {
>         .fw_flash_update_req    = pm8001_chip_fw_flash_update_req,
>         .set_dev_state_req      = pm8001_chip_set_dev_state_req,
>         .sas_re_init_req        = pm8001_chip_sas_re_initialization,
> +       .fatal_errors           = pm80xx_fatal_errors,
>  };
> diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
> index d1e9dba2ef19..f8d142f9b9ad 100644
> --- a/drivers/scsi/pm8001/pm8001_sas.c
> +++ b/drivers/scsi/pm8001/pm8001_sas.c
> @@ -1183,12 +1183,21 @@ int pm8001_abort_task(struct sas_task *task)
>         int rc = TMF_RESP_FUNC_FAILED, ret;
>         u32 phy_id;
>         struct sas_task_slow slow_task;
> +
>         if (unlikely(!task || !task->lldd_task || !task->dev))
>                 return TMF_RESP_FUNC_FAILED;
> +
>         dev = task->dev;
>         pm8001_dev = dev->lldd_dev;
>         pm8001_ha = pm8001_find_ha_by_dev(dev);
>         phy_id = pm8001_dev->attached_phy;
> +
> +       if (PM8001_CHIP_DISP->fatal_errors(pm8001_ha)) {
> +               // If the controller is seeing fatal errors
> +               // abort task will not get a response from the controller
> +               return TMF_RESP_FUNC_FAILED;
> +       }
> +
>         ret = pm8001_find_tag(task, &tag);
>         if (ret == 0) {
>                 pm8001_info(pm8001_ha, "no tag for task:%p\n", task);
> diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
> index f2c8cbad3853..039ed91e9841 100644
> --- a/drivers/scsi/pm8001/pm8001_sas.h
> +++ b/drivers/scsi/pm8001/pm8001_sas.h
> @@ -215,6 +215,7 @@ struct pm8001_dispatch {
>         int (*sas_diag_execute_req)(struct pm8001_hba_info *pm8001_ha,
>                 u32 state);
>         int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha);
> +       int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha);
>  };
>
>  struct pm8001_chip_info {
> @@ -725,6 +726,7 @@ ssize_t pm80xx_get_fatal_dump(struct device *cdev,
>  ssize_t pm80xx_get_non_fatal_dump(struct device *cdev,
>                 struct device_attribute *attr, char *buf);
>  ssize_t pm8001_get_gsm_dump(struct device *cdev, u32, char *buf);
> +int pm80xx_fatal_errors(struct pm8001_hba_info *pm8001_ha);
>  /* ctl shared API */
>  extern struct device_attribute *pm8001_host_attrs[];
>
> diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
> index 9c4b8b374ab8..86a3d483749c 100644
> --- a/drivers/scsi/pm8001/pm80xx_hwi.c
> +++ b/drivers/scsi/pm8001/pm80xx_hwi.c
> @@ -1525,6 +1525,41 @@ static int mpi_uninit_check(struct pm8001_hba_info *pm8001_ha)
>         return 0;
>  }
>
> +/**
> + * pm80xx_fatal_errors - returns non zero *ONLY* when fatal errors
> + * @pm8001_ha: our hba card information
> + *
> + * Fatal errors are recoverable only after a host reboot.
> + */
> +int
> +pm80xx_fatal_errors(struct pm8001_hba_info *pm8001_ha)
> +{
> +       int ret = 0;
> +       u32 scratch_pad_rsvd0 = pm8001_cr32(pm8001_ha, 0,
> +                                       MSGU_HOST_SCRATCH_PAD_6);
> +       u32 scratch_pad_rsvd1 = pm8001_cr32(pm8001_ha, 0,
> +                                       MSGU_HOST_SCRATCH_PAD_7);
> +       u32 scratch_pad1 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1);
> +       u32 scratch_pad2 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2);
> +       u32 scratch_pad3 = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3);
> +
> +       if (pm8001_ha->chip_id != chip_8006 &&
> +                       pm8001_ha->chip_id != chip_8074 &&
> +                       pm8001_ha->chip_id != chip_8076) {
> +               return 0;
> +       }
> +
> +       if (MSGU_SCRATCHPAD1_STATE_FATAL_ERROR(scratch_pad1)) {
> +               pm8001_dbg(pm8001_ha, FAIL,
> +                       "Fatal error SCRATCHPAD1 = 0x%x SCRATCHPAD2 = 0x%x SCRATCHPAD3 = 0x%x SCRATCHPAD_RSVD0 = 0x%x SCRATCHPAD_RSVD1 = 0x%x\n",
> +                               scratch_pad1, scratch_pad2, scratch_pad3,
> +                               scratch_pad_rsvd0, scratch_pad_rsvd1);
> +               ret = 1;
> +       }
> +
> +       return ret;
> +}
> +
>  /**
>   * pm8001_chip_soft_rst - soft reset the PM8001 chip, so that the clear all
>   * the FW register status to the originated status.
> @@ -4959,4 +4994,5 @@ const struct pm8001_dispatch pm8001_80xx_dispatch = {
>         .set_nvmd_req           = pm8001_chip_set_nvmd_req,
>         .fw_flash_update_req    = pm8001_chip_fw_flash_update_req,
>         .set_dev_state_req      = pm8001_chip_set_dev_state_req,
> +       .fatal_errors           = pm80xx_fatal_errors,
>  };
> diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h
> index 2b6b52551968..2c8e85cfdbc4 100644
> --- a/drivers/scsi/pm8001/pm80xx_hwi.h
> +++ b/drivers/scsi/pm8001/pm80xx_hwi.h
> @@ -1368,6 +1368,19 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t;
>  #define MSGU_HOST_SCRATCH_PAD_6                        0x6C
>  #define MSGU_HOST_SCRATCH_PAD_7                        0x70
>
> +#define MSGU_SCRATCHPAD1_RAAE_STATE_ERR(x) ((x & 0x3) == 0x2)
> +#define MSGU_SCRATCHPAD1_ILA_STATE_ERR(x) (((x >> 2) & 0x3) == 0x2)
> +#define MSGU_SCRATCHPAD1_BOOTLDR_STATE_ERR(x) ((((x >> 4) & 0x7) == 0x7) || \
> +                                               (((x >> 4) & 0x7) == 0x4))
> +#define MSGU_SCRATCHPAD1_IOP0_STATE_ERR(x) (((x >> 10) & 0x3) == 0x2)
> +#define MSGU_SCRATCHPAD1_IOP1_STATE_ERR(x) (((x >> 12) & 0x3) == 0x2)
> +#define MSGU_SCRATCHPAD1_STATE_FATAL_ERROR(x)  \
> +                       (MSGU_SCRATCHPAD1_RAAE_STATE_ERR(x) ||      \
> +                        MSGU_SCRATCHPAD1_ILA_STATE_ERR(x) ||       \
> +                        MSGU_SCRATCHPAD1_BOOTLDR_STATE_ERR(x) ||   \
> +                        MSGU_SCRATCHPAD1_IOP0_STATE_ERR(x) ||      \
> +                        MSGU_SCRATCHPAD1_IOP1_STATE_ERR(x))
> +
>  /* bit definition for ODMR register */
>  #define ODMR_MASK_ALL                  0xFFFFFFFF/* mask all
>                                         interrupt vector */
> --
> 2.16.3
>



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux