[AMD Official Use Only - AMD Internal Distribution Only] Please split the patch into at least three parts, one for bad page/record number calculation based on nps, one for nps saving and the third one for code refine of bad page add. Please check my inline comments for other suggestions. > -----Original Message----- > From: Xie, Patrick <Gangliang.Xie@xxxxxxx> > Sent: Thursday, February 20, 2025 2:17 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Zhou1, Tao > <Tao.Zhou1@xxxxxxx>; Xie, Patrick <Gangliang.Xie@xxxxxxx> > Subject: [PATCH] drm/amdgpu: Save nps to eeprom and refine code > > add nps info into eeprom records, and refine adding bad page logic based on nps > info. > > Signed-off-by: ganglxie <ganglxie@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 244 +++++++++--------- > .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 25 +- > .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 20 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 7 + > 4 files changed, 153 insertions(+), 143 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 5420e2d6d244..3945dda54b3f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -2801,20 +2801,101 @@ static int amdgpu_ras_mca2pa(struct amdgpu_device > *adev, > return -EINVAL; > } > > +static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, > + struct eeprom_table_record *bps, int count) { > + int j; > + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > + struct ras_err_handler_data *data = con->eh_data; > + > + for (j = 0; j < count; j++) { > + if (amdgpu_ras_check_bad_page_unlock(con, > + bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) > + continue; > + > + if (!data->space_left && > + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { > + return -ENOMEM; > + } > + > + amdgpu_ras_reserve_page(adev, bps[j].retired_page); > + > + memcpy(&data->bps[data->count], &(bps[j]), > + sizeof(struct eeprom_table_record)); > + data->count++; > + data->space_left--; > + } > + > + return 0; > +} > + > +static int __amdgpu_ras_convert_unit_rec_from_rom(struct amdgpu_device *adev, [Tao] for the name of unit_rec, how about rec_array? > + struct eeprom_table_record *bps, struct ras_err_data > *err_data, > + enum amdgpu_memory_partition nps) > +{ > + int i = 0; > + int ret = 0; > + enum amdgpu_memory_partition save_nps; > + > + save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & > UMC_NPS_MASK; > + > + for (i = 0; i < adev->umc.retire_unit; i++) > + bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); > + > + if (save_nps) { > + if (save_nps == nps) { > + if (amdgpu_umc_pages_in_a_row(adev, err_data, > + bps[0].retired_page << > AMDGPU_GPU_PAGE_SHIFT)) > + return -EINVAL; > + } else { > + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data)) > + return -EINVAL; > + } > + } else { > + if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) { > + if (nps == AMDGPU_NPS1_PARTITION_MODE) > + memcpy(err_data->err_addr, bps, > + sizeof(struct eeprom_table_record) * adev- > >umc.retire_unit); > + else > + return -EOPNOTSUPP; > + } > + } > + > + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, > +adev->umc.retire_unit); } > + > +static int __amdgpu_ras_convert_single_rec_from_rom(struct amdgpu_device [Tao] we can name it as __amdgpu_ras_convert _rec_from_rom for short > *adev, > + struct eeprom_table_record *bps, struct ras_err_data > *err_data, > + enum amdgpu_memory_partition nps) > +{ > + enum amdgpu_memory_partition save_nps; > + > + save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; > + bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); > + > + if (save_nps == nps) { > + if (amdgpu_umc_pages_in_a_row(adev, err_data, > + bps->retired_page << > AMDGPU_GPU_PAGE_SHIFT)) > + return -EINVAL; > + } else { > + if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) > + return -EINVAL; > + } > + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, > + adev- > >umc.retire_unit); > +} > + > /* it deal with vram only. */ > int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, > struct eeprom_table_record *bps, int pages, bool from_rom) { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > - struct ras_err_handler_data *data; > struct ras_err_data err_data; > - struct eeprom_table_record *err_rec; > struct amdgpu_ras_eeprom_control *control = > &adev->psp.ras_context.ras->eeprom_control; > enum amdgpu_memory_partition nps = > AMDGPU_NPS1_PARTITION_MODE; > int ret = 0; > - uint32_t i, j, loop_cnt = 1; > - bool find_pages_per_pa = false; > + uint32_t i; > > if (!con || !con->eh_data || !bps || pages <= 0) > return 0; > @@ -2825,108 +2906,41 @@ int amdgpu_ras_add_bad_pages(struct > amdgpu_device *adev, > sizeof(struct eeprom_table_record), GFP_KERNEL); > if (!err_data.err_addr) { > dev_warn(adev->dev, "Failed to alloc UMC error address > record in mca2pa conversion!\n"); > - ret = -ENOMEM; > - goto out; > + return -ENOMEM; > } > > - err_rec = err_data.err_addr; > - loop_cnt = adev->umc.retire_unit; > if (adev->gmc.gmc_funcs->query_mem_partition_mode) > nps = adev->gmc.gmc_funcs- > >query_mem_partition_mode(adev); > } > > mutex_lock(&con->recovery_lock); > - data = con->eh_data; > - if (!data) { > - /* Returning 0 as the absence of eh_data is acceptable */ > - goto free; > - } > - > - for (i = 0; i < pages; i++) { > - if (from_rom && > - control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) { > - if (!find_pages_per_pa) { > - if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], > &err_data)) { > - if (!i && nps == > AMDGPU_NPS1_PARTITION_MODE) { > - /* may use old RAS TA, use PA to find > pages in > - * one row > - */ > - if > (amdgpu_umc_pages_in_a_row(adev, &err_data, > - > bps[i].retired_page << > - > AMDGPU_GPU_PAGE_SHIFT)) { > - ret = -EINVAL; > - goto free; > - } else { > - find_pages_per_pa = true; > - } > - } else { > - /* unsupported cases */ > - ret = -EOPNOTSUPP; > - goto free; > - } > - } > - } else { > - if (amdgpu_umc_pages_in_a_row(adev, &err_data, > - bps[i].retired_page << > AMDGPU_GPU_PAGE_SHIFT)) { > - ret = -EINVAL; > - goto free; > - } > - } > - } else { > - if (from_rom && !find_pages_per_pa) { > - if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) { > - /* bad page in any NPS mode in eeprom */ > - if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], > &err_data)) { > - ret = -EINVAL; > + > + if (from_rom) { > + for (i = 0; i < pages; i++) { > + if (control->ras_num_recs - i >= adev->umc.retire_unit) { > + if ((bps[i].address == bps[i + 1].address) && > + (bps[i].mem_channel == bps[i + [Tao] here should use space instead of tab > 1].mem_channel)) { > + //deal retire_unit records a time [Tao] deal with > + ret = > __amdgpu_ras_convert_unit_rec_from_rom(adev, > + &bps[i], > &err_data, nps); > + if (ret) > goto free; > - } > + i += (adev->umc.retire_unit - 1); > } else { > - /* legacy bad page in eeprom, generated only > in > - * NPS1 mode > - */ > - if (amdgpu_ras_mca2pa(adev, &bps[i], > &err_data)) { > - /* old RAS TA or ASICs which don't > support to > - * convert addrss via mca address > - */ > - if (!i && nps == > AMDGPU_NPS1_PARTITION_MODE) { > - find_pages_per_pa = true; > - err_rec = &bps[i]; > - loop_cnt = 1; > - } else { > - /* non-nps1 mode, old RAS TA > - * can't support it > - */ > - ret = -EOPNOTSUPP; > - goto free; > - } > - } > + break; > } > - > - if (!find_pages_per_pa) > - i += (adev->umc.retire_unit - 1); > } else { > - err_rec = &bps[i]; > + break; > } > } > - > - for (j = 0; j < loop_cnt; j++) { > - if (amdgpu_ras_check_bad_page_unlock(con, > - err_rec[j].retired_page << > AMDGPU_GPU_PAGE_SHIFT)) > - continue; > - > - if (!data->space_left && > - amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { > - ret = -ENOMEM; > + for (; i < pages; i++) { > + ret = __amdgpu_ras_convert_single_rec_from_rom(adev, > + &bps[i], &err_data, nps); > + if (ret) > goto free; > - } > - > - amdgpu_ras_reserve_page(adev, err_rec[j].retired_page); > - > - memcpy(&data->bps[data->count], &(err_rec[j]), > - sizeof(struct eeprom_table_record)); > - data->count++; > - data->space_left--; > } > + } else { > + ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages); > } > > free: > @@ -2971,24 +2985,14 @@ int amdgpu_ras_save_bad_pages(struct > amdgpu_device *adev, > > /* only new entries are saved */ > if (save_count > 0) { > - if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) { > + for (i = 0; i < unit_num; i++) { > if (amdgpu_ras_eeprom_append(control, > - &data->bps[control->ras_num_recs], > - save_count)) { > + &data->bps[bad_page_num + i * adev- > >umc.retire_unit], > + 1)) { > dev_err(adev->dev, "Failed to save EEPROM table > data!"); > return -EIO; > } > - } else { > - for (i = 0; i < unit_num; i++) { > - if (amdgpu_ras_eeprom_append(control, > - &data->bps[bad_page_num + i * adev- > >umc.retire_unit], > - 1)) { > - dev_err(adev->dev, "Failed to save EEPROM > table data!"); > - return -EIO; > - } > - } > } > - > dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", > save_count); > } > > @@ -3005,6 +3009,7 @@ static int amdgpu_ras_load_bad_pages(struct > amdgpu_device *adev) > &adev->psp.ras_context.ras->eeprom_control; > struct eeprom_table_record *bps; > int ret; > + int i = 0; [Tao] I prefer to "int ret, i = 0;" > > /* no bad page record, skip eeprom access */ > if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0) @@ - > 3018,13 +3023,23 @@ static int amdgpu_ras_load_bad_pages(struct > amdgpu_device *adev) > if (ret) { > dev_err(adev->dev, "Failed to load EEPROM table records!"); > } else { > - if (control->ras_num_recs > 1 && > - adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { > - if ((bps[0].address == bps[1].address) && > - (bps[0].mem_channel == bps[1].mem_channel)) > - control->rec_type = > AMDGPU_RAS_EEPROM_REC_PA; > - else > - control->rec_type = > AMDGPU_RAS_EEPROM_REC_MCA; > + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { > + for (i = 0; i < control->ras_num_recs; i++) { > + if ((control->ras_num_recs - i) >= adev- > >umc.retire_unit) { > + if ((bps[i].address == bps[i + 1].address) && > + (bps[i].mem_channel == bps[i + > 1].mem_channel)) { > + control->ras_num_pa_recs += adev- > >umc.retire_unit; > + i += (adev->umc.retire_unit - 1); > + } else { > + control->ras_num_mca_recs += > + (control- > >ras_num_recs - i); > + break; > + } > + } else { > + control->ras_num_mca_recs += (control- > >ras_num_recs - i); > + break; > + } > + } > } > > ret = amdgpu_ras_eeprom_check(control); @@ -3438,12 +3453,7 > @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) > return ret; > > if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) > - control->rec_type = AMDGPU_RAS_EEPROM_REC_PA; > - > - /* default status is MCA storage */ > - if (control->ras_num_recs <= 1 && > - adev->umc.ras && adev->umc.ras->convert_ras_err_addr) > - control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA; > + control->ras_num_pa_recs = control->ras_num_recs; > > if (control->ras_num_recs) { > ret = amdgpu_ras_load_bad_pages(adev); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > index 83b54efcaa87..ab27cecb5519 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c > @@ -727,11 +727,9 @@ amdgpu_ras_eeprom_append_table(struct > amdgpu_ras_eeprom_control *control, > - control->ras_fri) > % control->ras_max_record_count; > > - if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) > - control->ras_num_bad_pages = control->ras_num_recs; > - else > - control->ras_num_bad_pages = > - control->ras_num_recs * adev->umc.retire_unit; > + control->ras_num_mca_recs += num; > + control->ras_num_bad_pages += num * adev->umc.retire_unit; > + > Out: > kfree(buf); > return res; > @@ -852,6 +850,7 @@ int amdgpu_ras_eeprom_append(struct > amdgpu_ras_eeprom_control *control, { > struct amdgpu_device *adev = to_amdgpu_device(control); > int res, i; > + uint64_t nps = AMDGPU_NPS1_PARTITION_MODE; > > if (!__is_ras_eeprom_supported(adev)) > return 0; > @@ -865,9 +864,12 @@ int amdgpu_ras_eeprom_append(struct > amdgpu_ras_eeprom_control *control, > return -EINVAL; > } > > + if (adev->gmc.gmc_funcs->query_mem_partition_mode) > + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); > + > /* set the new channel index flag */ > for (i = 0; i < num; i++) > - record[i].retired_page |= UMC_CHANNEL_IDX_V2; > + record[i].retired_page |= (nps << UMC_NPS_SHIFT); > > mutex_lock(&control->ras_tbl_mutex); > > @@ -881,7 +883,7 @@ int amdgpu_ras_eeprom_append(struct > amdgpu_ras_eeprom_control *control, > > /* clear channel index flag, the flag is only saved on eeprom */ > for (i = 0; i < num; i++) > - record[i].retired_page &= ~UMC_CHANNEL_IDX_V2; > + record[i].retired_page &= ~(nps << UMC_NPS_SHIFT); > > return res; > } > @@ -1392,6 +1394,8 @@ int amdgpu_ras_eeprom_init(struct > amdgpu_ras_eeprom_control *control) > } > control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); > > + control->ras_num_mca_recs = 0; > + control->ras_num_pa_recs = 0; > return 0; > } > > @@ -1412,11 +1416,8 @@ int amdgpu_ras_eeprom_check(struct > amdgpu_ras_eeprom_control *control) > if (!__get_eeprom_i2c_addr(adev, control)) > return -EINVAL; > > - if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) > - control->ras_num_bad_pages = control->ras_num_recs; > - else > - control->ras_num_bad_pages = > - control->ras_num_recs * adev->umc.retire_unit; > + control->ras_num_bad_pages = control->ras_num_pa_recs + > + control->ras_num_mca_recs * adev->umc.retire_unit; > > if (hdr->header == RAS_TABLE_HDR_VAL) { > DRM_DEBUG_DRIVER("Found existing EEPROM table with %d > records", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > index 81d55cb7b397..13f7eda9a696 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h > @@ -43,19 +43,6 @@ enum amdgpu_ras_eeprom_err_type { > AMDGPU_RAS_EEPROM_ERR_COUNT, > }; > > -/* > - * one UMC MCA address could map to multiply physical address (PA), > - * such as 1:16, we use eeprom_table_record.address to store MCA > - * address and use eeprom_table_record.retired_page to save PA. > - * > - * AMDGPU_RAS_EEPROM_REC_PA: one record store one PA > - * AMDGPU_RAS_EEPROM_REC_MCA: one record store one MCA address > - */ > -enum amdgpu_ras_eeprom_rec_type { > - AMDGPU_RAS_EEPROM_REC_PA, > - AMDGPU_RAS_EEPROM_REC_MCA, > -}; > - > struct amdgpu_ras_eeprom_table_header { > uint32_t header; > uint32_t version; > @@ -100,6 +87,12 @@ struct amdgpu_ras_eeprom_control { > */ > u32 ras_num_bad_pages; > > + /* Number of records store mca address */ > + u32 ras_num_mca_recs; > + > + /* Number of records store physical address */ > + u32 ras_num_pa_recs; > + > /* First record index to read, 0-based. > * Range is [0, num_recs-1]. This is > * an absolute index, starting right after @@ -120,7 +113,6 @@ struct > amdgpu_ras_eeprom_control { > /* Record channel info which occurred bad pages > */ > u32 bad_channel_bitmap; > - enum amdgpu_ras_eeprom_rec_type rec_type; > }; > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > index a4a7e61817aa..857693bcd8d4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > @@ -71,6 +71,13 @@ > */ > #define UMC_CHANNEL_IDX_V2 BIT_ULL(47) > > +/* > + * save nps value to eeprom_table_record.retired_page[47:40], > + * the channel index flag above will be retired. > + */ > +#define UMC_NPS_SHIFT 40 > +#define UMC_NPS_MASK 0xffULL > + > typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, > uint32_t umc_inst, uint32_t ch_inst, void *data); > > -- > 2.34.1