Reset error data info stored in vram when user clear eeprom table. Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 97 ++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 + .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 4 + 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 753260745554..9c1072ea5760 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2336,6 +2336,77 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, return ret; } +int amdgpu_ras_error_data_init(struct ras_err_data *err_data) +{ + memset(err_data, 0, sizeof(*err_data)); + + INIT_LIST_HEAD(&err_data->err_node_list); + + return 0; +} + +static void amdgpu_ras_error_node_release(struct ras_err_node *err_node) +{ + if (!err_node) + return; + + list_del(&err_node->node); + kvfree(err_node); +} + +void amdgpu_ras_error_data_fini(struct ras_err_data *err_data) +{ + struct ras_err_node *err_node, *tmp; + + list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) + amdgpu_ras_error_node_release(err_node); +} + +static void amdgpu_ras_reset_error_info(struct ras_manager *obj) +{ + struct ras_err_data *err_data; + + if (!obj) + return; + + err_data = &obj->err_data; + + /* release all error nodes */ + amdgpu_ras_error_data_fini(err_data); + + /* reset error data and init */ + amdgpu_ras_error_data_init(err_data); +} + +/* reset vram bad pages data and umc ras manager error count */ +int amdgpu_ras_reset_vram_bad_pages(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data; + struct ras_manager *obj; + + if (!con || !con->eh_data) + return 0; + + mutex_lock(&con->recovery_lock); + + data = con->eh_data; + data->space_left += data->count; + data->count = 0; + memset(data->bps, 0, data->space_left * sizeof(data->bps)); + + mutex_unlock(&con->recovery_lock); + + list_for_each_entry(obj, &con->head, node) { + if (obj->head.block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_reset_error_info(obj); + break; + } + } + + return 0; +} + /* * write error record array to eeprom, the function should be * protected by recovery_lock @@ -3556,32 +3627,6 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, } } -int amdgpu_ras_error_data_init(struct ras_err_data *err_data) -{ - memset(err_data, 0, sizeof(*err_data)); - - INIT_LIST_HEAD(&err_data->err_node_list); - - return 0; -} - -static void amdgpu_ras_error_node_release(struct ras_err_node *err_node) -{ - if (!err_node) - return; - - list_del(&err_node->node); - kvfree(err_node); -} - -void amdgpu_ras_error_data_fini(struct ras_err_data *err_data) -{ - struct ras_err_node *err_node, *tmp; - - list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) - amdgpu_ras_error_node_release(err_node); -} - static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 665414c22ca9..64710517b9fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -622,6 +622,8 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, unsigned long *new_cnt); +int amdgpu_ras_reset_vram_bad_pages(struct amdgpu_device *adev); + static inline enum ta_ras_block amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { switch (block) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 65aa218380be..40060f1b8ad6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -462,6 +462,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) mutex_unlock(&control->ras_tbl_mutex); + /* reset dad pages in vram structure */ + if (amdgpu_ras_reset_vram_bad_pages(adev)) + dev_warn(adev->dev, "reset vram bad pages structure failed, need reboot system\n"); + return res; } -- 2.25.1