Can we delete userqueue_start/end? I think the original purpose for those two fields are to specify a doorbell range for user queues, but were never used. Now we clearly want user queue to use all the range, minus range of IH/MM/SDMA. So those two fields will not be used. Thanks, Oak -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Zhao, Yong Sent: Thursday, January 31, 2019 5:27 PM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Zhao, Yong <Yong.Zhao@xxxxxxx> Subject: [PATCH 2/3] drm/amdkfd: Fix bugs regarding CP user queue doorbells mask on SOC15 Reserved doorbells for SDMA IH and VCN were not properly masked out when allocating doorbells for CP user queues. This patch fixed that. Change-Id: I670adfc3fd7725d2ed0bd9665cb7f69f8b9023c2 Signed-off-by: Yong Zhao <Yong.Zhao@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 17 ++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 4 +++ drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 3 +++ drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 25 +++++++++++++------ .../gpu/drm/amd/include/kgd_kfd_interface.h | 19 +++++--------- 7 files changed, 56 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e957e42c539a..13710f34191a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -196,11 +196,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.sdma_doorbell[1][i+1] = adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); } - /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for - * SDMA, IH and VCN. So don't use them for the CP. + + /* Because of the setting in registers like + * SDMA0_DOORBELL_RANGE etc., BIF statically uses the + * lower 12 bits of doorbell address for routing, in + * order to route the CP queue doorbells to CP engine, + * the doorbells allocated to CP queues have to be + * outside the range set for SDMA, VCN, and IH blocks + * Prior to SOC15, all queues use queue ID to + * determine doorbells. */ - gpu_resources.reserved_doorbell_mask = 0x1e0; - gpu_resources.reserved_doorbell_val = 0x0e0; + gpu_resources.reserved_doorbells_start = + adev->doorbell_index.sdma_engine[0]; + gpu_resources.reserved_doorbells_end = + adev->doorbell_index.last_non_cp; kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 1cfec06f81d4..4de431f7f380 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -71,6 +71,7 @@ struct amdgpu_doorbell_index { uint32_t vce_ring6_7; } uvd_vce; }; + uint32_t last_non_cp; uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; @@ -143,6 +144,7 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D, AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E, AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F, + AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7, AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; @@ -222,6 +224,8 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, + AMDGPU_DOORBELL64_LAST_NON_CP = AMDGPU_DOORBELL64_VCE_RING6_7, + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 4b5d60ea3e78..fa0433199215 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -81,6 +81,9 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + + adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; + /* In unit of dword doorbell */ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1; adev->doorbell_index.sdma_doorbell_range = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 53716c593b2b..b1052caaff5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -85,6 +85,9 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + + adev->doorbell_index.last_non_cp = +AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; + adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1; adev->doorbell_index.sdma_doorbell_range = 20; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e5ebcca7f031..6b8459f852cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -103,6 +103,15 @@ #define KFD_KERNEL_QUEUE_SIZE 2048 +/* 512 = 0x200 + * On SOC15, the doorbell index distance for SDMA RLC i and (i + 1) in +the + * same SDMA engine, where i is a even number. + * For 8-bytes doorbells, it ensures that the mirror doorbell range (in +terms + * of low 12 bit address for each HW engine) on the second doorbell +page is + * the same as the range of the first doorbell page.*/ #define +KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 + + /* * Kernel module parameter to specify maximum number of supported queues per * device diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 80b36e860a0a..c7b67c65d62d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -607,13 +607,24 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, if (!qpd->doorbell_bitmap) return -ENOMEM; - /* Mask out any reserved doorbells */ - for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) - if ((dev->shared_resources.reserved_doorbell_mask & i) == - dev->shared_resources.reserved_doorbell_val) { - set_bit(i, qpd->doorbell_bitmap); - pr_debug("reserved doorbell 0x%03x\n", i); - } + /* Mask out all reserved doorbells for SDMA, IH, and VCN on SOC15. + * Because of the setting in registers like SDMA0_DOORBELL_RANGE etc., + * BIF statically uses the lower 12 bits of doorbell address for + * routing. In order to route the CP queue doorbells to CP engine, + * the doorbells allocated to CP queues have to be outside the range + * set for SDMA, VCN, and IH blocks. + * Prior to SOC15, all queues use queue ID to + * determine doorbells. */ + i = dev->shared_resources.reserved_doorbells_start; + while (i <= dev->shared_resources.reserved_doorbells_end) { + set_bit(i, qpd->doorbell_bitmap); + set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, + qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i, + i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); + + i++; + } return 0; } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 83d960110d23..b1bf45419d93 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -137,20 +137,13 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means Queue n is available for KFD */ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); - /* Doorbell assignments (SOC15 and later chips only). Only - * specific doorbells are routed to each SDMA engine. Others - * are routed to IH and VCN. They are not usable by the CP. - * - * Any doorbell number D that satisfies the following condition - * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val - * - * KFD currently uses 1024 (= 0x3ff) doorbells per process. If - * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means - * mask would be set to 0x1e0 and val set to 0x0e0. - */ unsigned int sdma_doorbell[2][8]; - unsigned int reserved_doorbell_mask; - unsigned int reserved_doorbell_val; + + /* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH, + * and VCN + */ + unsigned int reserved_doorbells_start; + unsigned int reserved_doorbells_end; /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx