RE: [PATCH] drm/amdgpu: Save PA of bad pages for old asics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx>

> -----Original Message-----
> From: Xie, Patrick <Gangliang.Xie@xxxxxxx>
> Sent: Wednesday, March 12, 2025 2:16 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Xie, Patrick <Gangliang.Xie@xxxxxxx>
> Subject: [PATCH] drm/amdgpu: Save PA of bad pages for old asics
>
> for old asics that do not support mca translating, we just save PA for them
>
> Signed-off-by: ganglxie <ganglxie@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c       | 24 ++++++++++++++++---
>  .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  9 +++++--
>  2 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 285e3aa2bb2f..7cf8a3036828 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2836,6 +2836,13 @@ static int
> __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
>
>       save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) &
> UMC_NPS_MASK;
>
> +     /*old asics just have pa in eeprom*/
> +     if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) {
> +             memcpy(err_data->err_addr, bps,
> +                     sizeof(struct eeprom_table_record) * adev->umc.retire_unit);
> +             goto out;
> +     }
> +
>       for (i = 0; i < adev->umc.retire_unit; i++)
>               bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT);
>
> @@ -2858,6 +2865,7 @@ static int
> __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev,
>               }
>       }
>
> +out:
>       return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev-
> >umc.retire_unit);  }
>
> @@ -2981,14 +2989,24 @@ int amdgpu_ras_save_bad_pages(struct
> amdgpu_device *adev,
>
>       /* only new entries are saved */
>       if (save_count > 0) {
> -             for (i = 0; i < unit_num; i++) {
> +             /*old asics only save pa to eeprom like before*/
> +             if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) <
> 12) {
>                       if (amdgpu_ras_eeprom_append(control,
> -                                     &data->bps[bad_page_num + i * adev-
> >umc.retire_unit],
> -                                     1)) {
> +                                     &data->bps[bad_page_num], save_count)) {
>                               dev_err(adev->dev, "Failed to save EEPROM table
> data!");
>                               return -EIO;
>                       }
> +             } else {
> +                     for (i = 0; i < unit_num; i++) {
> +                             if (amdgpu_ras_eeprom_append(control,
> +                                             &data->bps[bad_page_num +
> +                                             i * adev->umc.retire_unit], 1)) {
> +                                     dev_err(adev->dev, "Failed to save EEPROM
> table data!");
> +                                     return -EIO;
> +                             }
> +                     }
>               }
> +
>               dev_info(adev->dev, "Saved %d pages to EEPROM table.\n",
> save_count);
>       }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> index 09a6f8bc1a5a..3597ecd9baca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
> @@ -727,9 +727,14 @@ amdgpu_ras_eeprom_append_table(struct
> amdgpu_ras_eeprom_control *control,
>                                    - control->ras_fri)
>               % control->ras_max_record_count;
>
> -     control->ras_num_mca_recs += num;
> -     control->ras_num_bad_pages += num * adev->umc.retire_unit;
> +     /*old asics only save pa to eeprom like before*/
> +     if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12)
> +             control->ras_num_pa_recs += num;
> +     else
> +             control->ras_num_mca_recs += num;
>
> +     control->ras_num_bad_pages = control->ras_num_pa_recs +
> +                             control->ras_num_mca_recs * adev->umc.retire_unit;
>  Out:
>       kfree(buf);
>       return res;
> --
> 2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux