[AMD Official Use Only - AMD Internal Distribution Only] Series is Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Regards, Hawking -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Tao Zhou Sent: Tuesday, November 19, 2024 14:36 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx> Subject: [PATCH 01/21] drm/amdgpu: simplify RAS page retirement in one memory row Take R13 and column bits as a whole for UMC v12. Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 57 +++++++++++--------------- drivers/gpu/drm/amd/amdgpu/umc_v12_0.h | 1 + 2 files changed, 24 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 1a8ea834efa6..8939b4f1fb49 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -177,7 +177,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, struct ta_ras_query_address_input *addr_in) { - uint32_t col, row, row_xor, bank, channel_index; + uint32_t col, row, bank, channel_index; uint64_t soc_pa, retired_page, column, err_addr; struct ta_ras_query_address_output addr_out; @@ -195,31 +195,27 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, channel_index = addr_out.pa.channel_idx; col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); /* clear [C3 C2] in soc physical address */ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* clear [R13] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_R13_BIT); - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + /* loop for all possibilities of [R13 C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; +column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + retired_page |= (((column & 0x8) >> 3) << UMC_V12_0_PA_R13_BIT); + /* include column bit 0 and 1 */ col &= 0x3; col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); + row = (retired_page >> UMC_V12_0_PA_R0_BIT) & 0x3fffULL; - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); dev_info(adev->dev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); + retired_page, row, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, addr_in->ma.umc_inst); } @@ -229,7 +225,7 @@ static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, struct ta_ras_query_address_output *addr_out, uint64_t err_addr) { - uint32_t col, row, row_xor, bank, channel_index; + uint32_t col, row, bank, channel_index; uint64_t soc_pa, retired_page, column; soc_pa = addr_out->pa.pa; @@ -237,29 +233,27 @@ static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, channel_index = addr_out->pa.channel_idx; col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); /* clear [C3 C2] in soc physical address */ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* clear [R13] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_R13_BIT); - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + /* loop for all possibilities of [R13 C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; +column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + retired_page |= (((column & 0x8) >> 3) << UMC_V12_0_PA_R13_BIT); + /* include column bit 0 and 1 */ col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); + col |= ((column & 0x7) << 2); + row = (retired_page >> UMC_V12_0_PA_R0_BIT) & 0x3fffULL; - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); dev_info(adev->dev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); + retired_page, row, col, bank, channel_index); } } @@ -274,23 +268,18 @@ static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + /* clear [R13] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_R13_BIT); /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; +column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + retired_page |= (((column & 0x8) >> 3) << UMC_V12_0_PA_R13_BIT); if (pos >= len) return 0; pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - } return pos; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index be5598d76c1d..dea42810fc53 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -60,6 +60,7 @@ #define UMC_V12_0_PA_C2_BIT 15 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ +#define UMC_V12_0_PA_R0_BIT 22 #define UMC_V12_0_PA_R13_BIT 35 #define MCA_UMC_HWID_V12_0 0x96 -- 2.34.1