On Fri, Jan 5, 2018 at 12:17 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote: > GFXv7 and v8 dGPUs use a different addressing mode for KFD compared > to APUs (GPUVM64 vs HSA64). And dGPUs don't support MTYPE_CC. They > use MTYPE_UC instead for memory that requires coherency. > > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 11 +++ > .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 4 + > .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 56 +++++++++++++ > .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 93 ++++++++++++++++++++++ > 4 files changed, 164 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 3e2f53b..092653f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -1308,6 +1308,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) > case CHIP_KAVERI: > device_queue_manager_init_cik(&dqm->asic_ops); > break; > + > + case CHIP_HAWAII: > + device_queue_manager_init_cik_hawaii(&dqm->asic_ops); > + break; > + > + case CHIP_TONGA: > + case CHIP_FIJI: > + case CHIP_POLARIS10: > + case CHIP_POLARIS11: > + device_queue_manager_init_vi_tonga(&dqm->asic_ops); > + break; > default: > WARN(1, "Unexpected ASIC family %u", > dev->device_info->asic_family); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index 9fdc9c2..68be0aa 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -185,8 +185,12 @@ struct device_queue_manager { > > void device_queue_manager_init_cik( > struct device_queue_manager_asic_ops *asic_ops); > +void device_queue_manager_init_cik_hawaii( > + struct device_queue_manager_asic_ops *asic_ops); > void device_queue_manager_init_vi( > struct device_queue_manager_asic_ops *asic_ops); > +void device_queue_manager_init_vi_tonga( > + struct device_queue_manager_asic_ops *asic_ops); > void program_sh_mem_settings(struct device_queue_manager *dqm, > struct qcm_process_device *qpd); > unsigned int get_queues_num(struct device_queue_manager *dqm); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c > index 28e48c9..aed4c21 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c > @@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, > uint64_t alternate_aperture_size); > static int update_qpd_cik(struct device_queue_manager *dqm, > struct qcm_process_device *qpd); > +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd); > static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, > struct qcm_process_device *qpd); > +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, > + struct queue *q, > + struct qcm_process_device *qpd); > > void device_queue_manager_init_cik( > struct device_queue_manager_asic_ops *asic_ops) > @@ -45,6 +50,14 @@ void device_queue_manager_init_cik( > asic_ops->init_sdma_vm = init_sdma_vm; > } > > +void device_queue_manager_init_cik_hawaii( > + struct device_queue_manager_asic_ops *asic_ops) > +{ > + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; > + asic_ops->update_qpd = update_qpd_cik_hawaii; > + asic_ops->init_sdma_vm = init_sdma_vm_hawaii; > +} > + > static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) > { > /* In 64-bit mode, we can only control the top 3 bits of the LDS, > @@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm, > return 0; > } > > +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd) > +{ > + struct kfd_process_device *pdd; > + unsigned int temp; > + > + pdd = qpd_to_pdd(qpd); > + > + /* check if sh_mem_config register already configured */ > + if (qpd->sh_mem_config == 0) { > + qpd->sh_mem_config = > + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | > + DEFAULT_MTYPE(MTYPE_NONCACHED) | > + APE1_MTYPE(MTYPE_NONCACHED); > + qpd->sh_mem_ape1_limit = 0; > + qpd->sh_mem_ape1_base = 0; > + } > + > + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit > + * aperture addresses. > + */ > + temp = get_sh_mem_bases_nybble_64(pdd); > + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); > + > + pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", > + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); > + > + return 0; > +} > + > static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, > struct qcm_process_device *qpd) > { > @@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, > > q->properties.sdma_vm_addr = value; > } > + > +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, > + struct queue *q, > + struct qcm_process_device *qpd) > +{ > + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit > + * aperture addresses. > + */ > + q->properties.sdma_vm_addr = > + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << > + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & > + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; > +} > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c > index 2fbce57..fd60a11 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c > @@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, > enum cache_policy alternate_policy, > void __user *alternate_aperture_base, > uint64_t alternate_aperture_size); > +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd, > + enum cache_policy default_policy, > + enum cache_policy alternate_policy, > + void __user *alternate_aperture_base, > + uint64_t alternate_aperture_size); > static int update_qpd_vi(struct device_queue_manager *dqm, > struct qcm_process_device *qpd); > +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd); > static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, > struct qcm_process_device *qpd); > +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, > + struct queue *q, > + struct qcm_process_device *qpd); > > void device_queue_manager_init_vi( > struct device_queue_manager_asic_ops *asic_ops) > @@ -46,6 +57,14 @@ void device_queue_manager_init_vi( > asic_ops->init_sdma_vm = init_sdma_vm; > } > > +void device_queue_manager_init_vi_tonga( > + struct device_queue_manager_asic_ops *asic_ops) > +{ > + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; > + asic_ops->update_qpd = update_qpd_vi_tonga; > + asic_ops->init_sdma_vm = init_sdma_vm_tonga; > +} > + > static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) > { > /* In 64-bit mode, we can only control the top 3 bits of the LDS, > @@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, > return true; > } > > +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd, > + enum cache_policy default_policy, > + enum cache_policy alternate_policy, > + void __user *alternate_aperture_base, > + uint64_t alternate_aperture_size) > +{ > + uint32_t default_mtype; > + uint32_t ape1_mtype; > + > + default_mtype = (default_policy == cache_policy_coherent) ? > + MTYPE_UC : > + MTYPE_NC; > + > + ape1_mtype = (alternate_policy == cache_policy_coherent) ? > + MTYPE_UC : > + MTYPE_NC; > + > + qpd->sh_mem_config = > + SH_MEM_ALIGNMENT_MODE_UNALIGNED << > + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | > + default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | > + ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; > + > + return true; > +} > + > static int update_qpd_vi(struct device_queue_manager *dqm, > struct qcm_process_device *qpd) > { > @@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm, > return 0; > } > > +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, > + struct qcm_process_device *qpd) > +{ > + struct kfd_process_device *pdd; > + unsigned int temp; > + > + pdd = qpd_to_pdd(qpd); > + > + /* check if sh_mem_config register already configured */ > + if (qpd->sh_mem_config == 0) { > + qpd->sh_mem_config = > + SH_MEM_ALIGNMENT_MODE_UNALIGNED << > + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | > + MTYPE_UC << > + SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | > + MTYPE_UC << > + SH_MEM_CONFIG__APE1_MTYPE__SHIFT; > + > + qpd->sh_mem_ape1_limit = 0; > + qpd->sh_mem_ape1_base = 0; > + } > + > + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit > + * aperture addresses. > + */ > + temp = get_sh_mem_bases_nybble_64(pdd); > + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); > + > + pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", > + temp, qpd->sh_mem_bases); > + > + return 0; > +} > + > static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, > struct qcm_process_device *qpd) > { > @@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, > > q->properties.sdma_vm_addr = value; > } > + > +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, > + struct queue *q, > + struct qcm_process_device *qpd) > +{ > + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit > + * aperture addresses. > + */ > + q->properties.sdma_vm_addr = > + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << > + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & > + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; > +} > -- > 2.7.4 > This patch is: Acked-by: Oded Gabbay <oded.gabbay at gmail.com>