RE: [PATCH V2] drm/amdgpu: support reserve bad page for virt

"Yang, Stanley" <Stanley.Yang@xxxxxxx> · Fri, 5 Jun 2020 03:40:14 +0000

[AMD Public Use]

Hi Tao,

Thanks for your suggestion and reply inline.

Regards,
Stanley
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1@xxxxxxx>
> Sent: Friday, June 5, 2020 11:00 AM
> To: Yang, Stanley <Stanley.Yang@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Chen, Guchun
> <Guchun.Chen@xxxxxxx>; Liu, Monk <Monk.Liu@xxxxxxx>; Clements,
> John <John.Clements@xxxxxxx>; Li, Dennis <Dennis.Li@xxxxxxx>; Yang,
> Stanley <Stanley.Yang@xxxxxxx>
> Subject: RE: [PATCH V2] drm/amdgpu: support reserve bad page for virt
> 
> [AMD Public Use]
> 
> 
> 
> > -----Original Message-----
> > From: Stanley.Yang <Stanley.Yang@xxxxxxx>
> > Sent: 2020年6月4日
 20:36
> > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Chen, Guchun
> > <Guchun.Chen@xxxxxxx>; Liu, Monk <Monk.Liu@xxxxxxx>; Clements,
> John
> > <John.Clements@xxxxxxx>; Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Li,
> Dennis
> > <Dennis.Li@xxxxxxx>; Yang, Stanley <Stanley.Yang@xxxxxxx>
> > Subject: [PATCH V2] drm/amdgpu: support reserve bad page for virt
> >
> > Changed from V1:
> >         rename same functions name, only init ras error handler data for
> >         supported asic.
> >
> > Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx>
> > Change-Id: Ia0ad9453ac3ac929f95c73cbee5b7a8fc42a9816
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   3 +
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 172
> > +++++++++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  30 +++-
> >  3 files changed, 201 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > index 1df28b7bf22e..668ad0e35160 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > @@ -2326,6 +2326,9 @@ static int amdgpu_device_ip_fini(struct
> > amdgpu_device *adev)  {
> >         int i, r;
> >
> > +      if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
> > +                    amdgpu_virt_release_ras_err_handler_data(adev);
> > +
> >         amdgpu_ras_pre_fini(adev);
> >
> >         if (adev->gmc.xgmi.num_physical_nodes > 1) diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > index bab9286021a7..174fcb8c8b57 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> > @@ -26,6 +26,7 @@
> >  #include <drm/drm_drv.h>
> >
> >  #include "amdgpu.h"
> > +#include "amdgpu_ras.h"
> >
> >  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)  { @@ -
> > 255,12 +256,169 @@ int amdgpu_virt_fw_reserve_get_checksum(void
> *obj,
> >         return ret;
> >  }
> >
> > +static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device
> > +*adev) {
> > +      struct amdgpu_virt *virt = &adev->virt;
> > +      struct amdgpu_virt_ras_err_handler_data **data = "">
> > >virt_eh_data;
> > +      /* GPU will be marked bad on host if bp count more then 10,
> > +      * so alloc 512 is enough.
> > +      */
> > +      unsigned int align_space = 512;
> > +      void *bps = NULL;
> > +      struct amdgpu_bo **bps_bo = NULL;
> > +
> > +      *data = "" amdgpu_virt_ras_err_handler_data),
> > GFP_KERNEL);
> > +      if (!*data)
> > +                    return -ENOMEM;
> > +
> > +      bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL);
> > +      bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo),
> > GFP_KERNEL);
> > +
> > +      if (!bps || !bps_bo) {
> > +                    kfree(bps);
> > +                    kfree(bps_bo);
> > +                    return -ENOMEM;
> > +      }
> > +
> > +      (*data)->bps = bps;
> > +      (*data)->bps_bo = bps_bo;
> > +      (*data)->count = 0;
> > +      (*data)->last_reserved = 0;
> > +
> > +      virt->ras_init_done = true;
> > +
> > +      return 0;
> > +}
> > +
> > +static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) {
> > +      struct amdgpu_virt *virt = &adev->virt;
> > +      struct amdgpu_virt_ras_err_handler_data *data = "">
> > +      struct amdgpu_bo *bo;
> > +      int i;
> > +
> > +      if (!data)
> > +                    return;
> > +
> > +      for (i = data->last_reserved - 1; i >= 0; i--) {
> > +                    bo = data->bps_bo[i];
> > +                    amdgpu_bo_free_kernel(&bo, NULL, NULL);
> > +                    data->bps_bo[i] = bo;
> > +                    data->last_reserved = i;
> > +      }
> > +}
> > +
> > +void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device
> > +*adev) {
> > +      struct amdgpu_virt *virt = &adev->virt;
> > +      struct amdgpu_virt_ras_err_handler_data *data = "">
> > +
> > +      virt->ras_init_done = false;
> > +
> > +      if (!data)
> > +                    return;
> > +
> > +      amdgpu_virt_ras_release_bp(adev);
> > +
> > +      kfree(data->bps);
> > +      kfree(data->bps_bo);
> > +      kfree(data);
> > +      virt->virt_eh_data = NULL;
> > +}
> > +
> > +static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
> > +                    struct eeprom_table_record *bps, int pages) {
> > +      struct amdgpu_virt *virt = &adev->virt;
> > +      struct amdgpu_virt_ras_err_handler_data *data = "">
> > +
> > +      if (!data)
> > +                    return;
> > +
> > +      memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
> > +      data->count += pages;
> > +}
> > +
> > +static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) {
> > +      struct amdgpu_virt *virt = &adev->virt;
> > +      struct amdgpu_virt_ras_err_handler_data *data = "">
> > +      struct amdgpu_bo *bo = NULL;
> > +      uint64_t bp;
> > +      int i;
> > +
> > +      if (!data)
> > +                    return;
> > +
> > +      for (i = data->last_reserved; i < data->count; i++) {
> > +                    bp = data->bps[i].retired_page;
> > +
> > +                    /* There are two cases of reserve error should be ignored:
> > +                    * 1) a ras bad page has been allocated (used by someone);
> > +                    * 2) a ras bad page has been reserved (duplicate error
> > injection
> > +                    *    for one page);
> > +                    */
> > +                    if (amdgpu_bo_create_kernel_at(adev, bp <<
> > AMDGPU_GPU_PAGE_SHIFT,
> > +                                                                      AMDGPU_GPU_PAGE_SIZE,
> > +                                                                      AMDGPU_GEM_DOMAIN_VRAM,
> > +                                                                      &bo, NULL))
> > +                                   DRM_DEBUG("RAS WARN: [SRV-IO] reserve vram for
> > retired page %llx
> > +fail\n", bp);
> 
> [Tao] SRIOV? Typo?
[Yang, Stanley] :
Will keep the same message with bare-metal according to 
@Zhang, Hawking's suggestion.
> 
> > +
> > +                    data->bps_bo[i] = bo;
> > +                    data->last_reserved = i + 1;
> > +                    bo = NULL;
> > +      }
> > +}
> > +
> > +static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device
> *adev,
> > +                    uint64_t retired_page)
> > +{
> > +      struct amdgpu_virt *virt = &adev->virt;
> > +      struct amdgpu_virt_ras_err_handler_data *data = "">
> > +      int i;
> > +
> > +      if (!data)
> > +                    return true;
> > +
> > +      for (i = 0; i < data->count; i++)
> > +                    if (retired_page == data->bps[i].retired_page)
> > +                                   return true;
> > +
> > +      return false;
> > +}
> > +
> > +static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
> > +                    uint64_t bp_block_offset, uint32_t bp_block_size) {
> > +      struct eeprom_table_record bp;
> > +      uint64_t retired_page;
> > +      uint32_t bp_idx, bp_cnt;
> > +
> > +      if (bp_block_size) {
> > +                    bp_cnt = bp_block_size / sizeof(uint64_t);
> > +                    for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
> > +                                   retired_page = *(uint64_t *)(adev-
> > >fw_vram_usage.va +
> > +                                                               bp_block_offset + bp_idx *
> > sizeof(uint64_t));
> > +                                   bp.retired_page = retired_page;
> > +
> > +                                   if (amdgpu_virt_ras_check_bad_page(adev,
> > retired_page))
> > +                                                 continue;
> > +
> > +                                   amdgpu_virt_ras_add_bps(adev, &bp, 1);
> > +
> > +                                   amdgpu_virt_ras_reserve_bps(adev);
> > +                    }
> > +      }
> > +}
> > +
> >  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)  {
> >         uint32_t pf2vf_size = 0;
> >         uint32_t checksum = 0;
> >         uint32_t checkval;
> >         char *str;
> > +      uint64_t bp_block_offset = 0;
> > +      uint32_t bp_block_size = 0;
> >
> >         adev->virt.fw_reserve.p_pf2vf = NULL;
> >         adev->virt.fw_reserve.p_vf2pf = NULL; @@ -275,6 +433,20 @@ void
> > amdgpu_virt_init_data_exchange(struct
> > amdgpu_device *adev)
> >
> >                       /* pf2vf message must be in 4K */
> >                       if (pf2vf_size > 0 && pf2vf_size < 4096) {
> > +                                   if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
> > +                                                 struct amdgim_pf2vf_info_v2 *pf2vf_v2 =
> > NULL;
> > +
> > +                                                 pf2vf_v2 = (struct amdgim_pf2vf_info_v2
> > *)adev->virt.fw_reserve.p_pf2vf;
> > +                                                 bp_block_offset = ((uint64_t)pf2vf_v2-
> > >bp_block_offset_L & 0xFFFFFFFF) |
> > +                                                                              ((((uint64_t)pf2vf_v2-
> > >bp_block_offset_H) << 32) & 0xFFFFFFFF00000000);
> > +                                                 bp_block_size = pf2vf_v2->bp_block_size;
> > +
> > +                                                 if (bp_block_size && !adev-
> > >virt.ras_init_done)
> > +
> >         amdgpu_virt_init_ras_err_handler_data(adev);
> > +
> > +                                                 amdgpu_virt_add_bad_page(adev,
> > bp_block_offset, bp_block_size);
> 
> [Tao] We can add "if (adev->virt.ras_init_done)" before it to skip add bad
> page handle quickly in init failed case.
> 
[Yang, Stanley] :
Yes, add this judgement is better.
> 
> > +                                   }
> > +
> >                                      checkval = amdgpu_virt_fw_reserve_get_checksum(
> >                                                    adev->virt.fw_reserve.p_pf2vf, pf2vf_size,
> >                                                    adev->virt.fw_reserve.checksum_key,
> > checksum); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > index b90e822cebd7..f826945989c7 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> > @@ -143,19 +143,27 @@ struct  amdgim_pf2vf_info_v2 {
> >         uint32_t vce_enc_max_pixels_count;
> >         /* 16x16 pixels/sec, codec independent */
> >         uint32_t vce_enc_max_bandwidth;
> > +      /* Bad pages block position in BYTE */
> > +      uint32_t bp_block_offset_L;
> > +      uint32_t bp_block_offset_H;
> > +      /* Bad pages block size in BYTE */
> > +      uint32_t bp_block_size;
> >         /* MEC FW position in kb from the start of VF visible frame buffer */
> > -       uint64_t mecfw_kboffset;
> > +      uint32_t mecfw_kboffset_L;
> > +      uint32_t mecfw_kboffset_H;
> >         /* MEC FW size in KB */
> >         uint32_t mecfw_ksize;
> >         /* UVD FW position in kb from the start of VF visible frame buffer */
> > -       uint64_t uvdfw_kboffset;
> > +      uint32_t uvdfw_kboffset_L;
> > +      uint32_t uvdfw_kboffset_H;
> >         /* UVD FW size in KB */
> >         uint32_t uvdfw_ksize;
> >         /* VCE FW position in kb from the start of VF visible frame buffer */
> > -       uint64_t vcefw_kboffset;
> > +      uint32_t vcefw_kboffset_L;
> > +      uint32_t vcefw_kboffset_H;
> >         /* VCE FW size in KB */
> >         uint32_t vcefw_ksize;
> > -       uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256,
> 0,
> > 0, (9 + sizeof(struct
> > amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)];
> > +      uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256,
> 0,
> > 0, (18 +
> > +sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)),
> > +0)];
> >  } __aligned(4);
> >
> >
> > @@ -254,6 +262,17 @@ typedef struct amdgim_vf2pf_info_v2
> > amdgim_vf2pf_info ;
> >                       } \
> >         } while (0)
> >
> > +struct amdgpu_virt_ras_err_handler_data {
> > +      /* point to bad page records array */
> > +      struct eeprom_table_record *bps;
> > +      /* point to reserved bo array */
> > +      struct amdgpu_bo **bps_bo;
> > +      /* the count of entries */
> > +      int count;
> > +      /* last reserved entry's index + 1 */
> > +      int last_reserved;
> > +};
> > +
> >  /* GPU virtualization */
> >  struct amdgpu_virt {
> >         uint32_t                                          caps;
> > @@ -272,6 +291,8 @@ struct amdgpu_virt {
> >         uint32_t reg_access_mode;
> >         int req_init_data_ver;
> >         bool tdr_debug;
> > +      struct amdgpu_virt_ras_err_handler_data *virt_eh_data;
> > +      bool ras_init_done;
> >  };
> >
> >  #define amdgpu_sriov_enabled(adev) \
> > @@ -323,6 +344,7 @@ void amdgpu_virt_free_mm_table(struct
> > amdgpu_device *adev);  int amdgpu_virt_fw_reserve_get_checksum(void
> > *obj, unsigned long obj_size,
> >                                                                   unsigned int key,
> >                                                                   unsigned int chksum);
> > +void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device
> > +*adev);
> >  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
> > void amdgpu_detect_virtualization(struct amdgpu_device *adev);
> >
> > --
> > 2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx