GFXv7 and v8 dGPUs use a different addressing mode for KFD compared to APUs (GPUVM64 vs HSA64). And dGPUs don't support MTYPE_CC. They use MTYPE_UC instead for memory that requires coherency. Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 11 +++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 4 + .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 56 +++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 93 ++++++++++++++++++++++ 4 files changed, 164 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 3e2f53b..092653f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1308,6 +1308,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_KAVERI: device_queue_manager_init_cik(&dqm->asic_ops); break; + + case CHIP_HAWAII: + device_queue_manager_init_cik_hawaii(&dqm->asic_ops); + break; + + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + device_queue_manager_init_vi_tonga(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 9fdc9c2..68be0aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -185,8 +185,12 @@ struct device_queue_manager { void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 28e48c9..aed4c21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, uint64_t alternate_aperture_size); static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops) @@ -45,6 +50,14 @@ void device_queue_manager_init_cik( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; + asic_ops->update_qpd = update_qpd_cik_hawaii; + asic_ops->init_sdma_vm = init_sdma_vm_hawaii; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm, return 0; } +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + + pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +} + static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { @@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } + +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 2fbce57..fd60a11 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops) @@ -46,6 +57,14 @@ void device_queue_manager_init_vi( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; + asic_ops->update_qpd = update_qpd_vi_tonga; + asic_ops->init_sdma_vm = init_sdma_vm_tonga; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, return true; } +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_UC : + MTYPE_NC; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_UC : + MTYPE_NC; + + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; + + return true; +} + static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm, return 0; } +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_UC << + SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + MTYPE_UC << + SH_MEM_CONFIG__APE1_MTYPE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + + pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", + temp, qpd->sh_mem_bases); + + return 0; +} + static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { @@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } + +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; +} -- 2.7.4