On Wed, Nov 6, 2024 at 1:49 AM Victor Zhao <Victor.Zhao@xxxxxxx> wrote: > > From: Gang Ba <gaba@xxxxxxx> > > amdgpu_amdkfd_alloc_gtt_mem currently allocates USWC memory. > It uses write-combining for CPU access, which is slow for reading. > Add a new parameter to amdgpu_amdkfd_alloc_gtt_mem to allocate > normal GTT memory. > > Signed-off-by: Gang Ba <gaba@xxxxxxx> > Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > Signed-off-by: Victor Zhao <Victor.Zhao@xxxxxxx> Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- > 7 files changed, 11 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > index 713be49540d4..acb762855c24 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c > @@ -294,7 +294,7 @@ void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) > > int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, > void **mem_obj, uint64_t *gpu_addr, > - void **cpu_ptr, bool cp_mqd_gfx9) > + void **cpu_ptr, bool cp_mqd_gfx9, bool is_uswc_mode) > { > struct amdgpu_bo *bo = NULL; > struct amdgpu_bo_param bp; > @@ -305,7 +305,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, > bp.size = size; > bp.byte_align = PAGE_SIZE; > bp.domain = AMDGPU_GEM_DOMAIN_GTT; > - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; > + if (is_uswc_mode) > + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; > + else > + bp.flags = 0; > bp.type = ttm_bo_type_kernel; > bp.resv = NULL; > bp.bo_ptr_size = sizeof(struct amdgpu_bo); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 4b80ad860639..745121e0dd8e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -234,7 +234,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, > /* Shared API */ > int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, > void **mem_obj, uint64_t *gpu_addr, > - void **cpu_ptr, bool mqd_gfx9); > + void **cpu_ptr, bool mqd_gfx9, bool is_uswc_mode); > void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj); > int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, > void **mem_obj); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index 956198da7859..1f1d79ac5e6c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -779,7 +779,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, > if (amdgpu_amdkfd_alloc_gtt_mem( > kfd->adev, size, &kfd->gtt_mem, > &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, > - false)) { > + false, true)) { > dev_err(kfd_device, "Could not allocate %d bytes\n", size); > goto alloc_gtt_mem_failure; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index 38c19dc8311d..9672542578d4 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -2783,7 +2783,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) > > retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, > &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), > - (void *)&(mem_obj->cpu_ptr), false); > + (void *)&(mem_obj->cpu_ptr), false, true); > > return retval; > } > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > index 84e8ea3a8a0c..c9882f1d4419 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c > @@ -140,7 +140,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node, > NUM_XCC(node->xcc_mask), > &(mqd_mem_obj->gtt_mem), > &(mqd_mem_obj->gpu_addr), > - (void *)&(mqd_mem_obj->cpu_ptr), true); > + (void *)&(mqd_mem_obj->cpu_ptr), true, true); > > if (retval) { > kfree(mqd_mem_obj); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 6bab6fc6a35d..e1d8fd11d2b1 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -1639,7 +1639,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, > &pdd->proc_ctx_bo, > &pdd->proc_ctx_gpu_addr, > &pdd->proc_ctx_cpu_ptr, > - false); > + false, true); > if (retval) { > dev_err(dev->adev->dev, > "failed to allocate process context bo\n"); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > index c76db22a1000..3a3be0e19fb9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c > @@ -260,7 +260,7 @@ static int init_user_queue(struct process_queue_manager *pqm, > &(*q)->gang_ctx_bo, > &(*q)->gang_ctx_gpu_addr, > &(*q)->gang_ctx_cpu_ptr, > - false); > + false, true); > if (retval) { > pr_err("failed to allocate gang context bo\n"); > goto cleanup; > -- > 2.34.1 >