Almost error count registers are automatically cleared after reading once, so both CE and UE count needs to be read in one loop. Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 16 +++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 ++-- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c06cb06398b1..29fa6b6b9d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -335,7 +335,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, { struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; - unsigned long ras_counter; + unsigned long ras_counter_ue, ras_counter_ce; if (!fpriv) return -EINVAL; @@ -360,19 +360,17 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, if (atomic_read(&ctx->guilty)) out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY; - /*query ue count*/ - ras_counter = amdgpu_ras_query_error_count(adev, false); + /*query both ue and ce count*/ + amdgpu_ras_query_error_count(adev, &ras_counter_ue, &ras_counter_ce); /*ras counter is monotonic increasing*/ - if (ras_counter != ctx->ras_counter_ue) { + if (ras_counter_ue != ctx->ras_counter_ue) { out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE; - ctx->ras_counter_ue = ras_counter; + ctx->ras_counter_ue = ras_counter_ue; } - /*query ce count*/ - ras_counter = amdgpu_ras_query_error_count(adev, true); - if (ras_counter != ctx->ras_counter_ce) { + if (ras_counter_ce != ctx->ras_counter_ce) { out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE; - ctx->ras_counter_ce = ras_counter; + ctx->ras_counter_ce = ras_counter_ce; } mutex_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 337bf2da7bdc..109eff2869b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -861,15 +861,18 @@ int amdgpu_ras_error_cure(struct amdgpu_device *adev, } /* get the total error counts on all IPs */ -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce) +void amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ue_cnt, unsigned long *ce_cnt) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; struct ras_err_data data = {0, 0}; + *ue_cnt = 0; + *ce_cnt = 0; + if (!con) - return 0; + return; list_for_each_entry(obj, &con->head, node) { struct ras_query_if info = { @@ -877,13 +880,14 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, }; if (amdgpu_ras_error_query(adev, &info)) - return 0; + continue; data.ce_count += info.ce_count; data.ue_count += info.ue_count; } - return is_ce ? data.ce_count : data.ue_count; + *ue_cnt = data.ue_count; + *ce_cnt = data.ce_count; } /* query/inject/cure end */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index e7df5d8429f8..733eab5bc512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -487,8 +487,8 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev, void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, - bool is_ce); +void amdgpu_ras_query_error_count(struct amdgpu_device *adev, + unsigned long *ue_cnt, unsigned long *ce_cnt); /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx