We hit soft hang while doing memory pressure test on one numa system. After a qucik look, this is because kfd invalid/valid userptr memory frequently with process_info lock hold. perf top says below, 75.81% [kernel] [k] __srcu_read_unlock 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde 3.56% [kernel] [k] __srcu_read_lock 2.20% [amdgpu] [k] amdgpu_vm_cpu_update 2.20% [kernel] [k] __sg_page_iter_dma_next 2.15% [drm] [k] drm_dev_enter 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array 1.18% [kernel] [k] __sg_alloc_table_from_pages 1.09% [drm] [k] drm_dev_exit So move drm_dev_enter/exit outside gmc code, instead let caller do it. They are gart_unbind, gart_map, vm_cpu_update(already hold in its caller) and gmc_init_pdb0(no need) Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 7 ------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 76efd5f8950f..d7e4f4660acf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -34,6 +34,7 @@ #include <asm/set_memory.h> #endif #include "amdgpu.h" +#include <drm/drm_drv.h> /* * GART @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, u64 page_base; /* Starting from VEGA10, system bit must be 0 to mean invalid. */ uint64_t flags = 0; + int idx; if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); return -EINVAL; } + if (!drm_dev_enter(&adev->ddev, &idx)) + return 0; + t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; for (i = 0; i < pages; i++, p++) { @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + drm_dev_exit(idx); return 0; } @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, { uint64_t page_base; unsigned i, j, t; + int idx; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); return -EINVAL; } + if (!drm_dev_enter(&adev->ddev, &idx)) + return 0; + t = offset / AMDGPU_GPU_PAGE_SIZE; for (i = 0; i < pages; i++) { @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, page_base += AMDGPU_GPU_PAGE_SIZE; } } + drm_dev_exit(idx); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 54f059501a33..e973488250e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -31,7 +31,6 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" -#include <drm/drm_drv.h> /** * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0 @@ -153,10 +152,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, { void __iomem *ptr = (void *)cpu_pt_addr; uint64_t value; - int idx; - - if (!drm_dev_enter(&adev->ddev, &idx)) - return 0; /* * The following is for PTE only. GART does not have PDEs. @@ -165,8 +160,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, value |= flags; writeq(value, ptr + (gpu_page_idx * 8)); - drm_dev_exit(idx); - return 0; } -- 2.25.1