Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2021-09-15 9:57 a.m., Christian König wrote:
Am 15.09.21 um 15:52 schrieb Andrey Grodzovsky:
On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
[AMD Official Use Only]

Andrey
I hit panic with this plug/unplug test without this patch.


Can you please tell which ASIC you are using and which kernel branch and what is the tip commit ?


But as we add enter/exit in all its callers. maybe it would not impact plug/unplug.


If you add enter/exit in all callers then why this solves the problem ? Is it because in one or more callers amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit many times creates the problematic
contention from multiple threads ?

The most likely cause of this is cache line bouncing I think and yes moving the enter/exit a level up should fix this.

But I strongly suggest to test this with lockdep enabled and hotplug/GPU reset a couple of times.


Xinhui already tried testing with Hotplug and there is a regression crash as you can see bellow. That why I wanted to know which kernel and which ASIC to see if I can reproduce and fix the regression quickly.

I will try on my side with latest kernel and vega 10 anyway since i see vega 10 functions from the trace.

Andrey



Christian.


Andrey


[ 1109.041095] BUG: unable to handle page fault for address: 00000000000010e1
[ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
[ 1109.196706] Call Trace:
[ 1109.199374]  ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
[ 1109.205843]  amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
[ 1109.212776]  amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
[ 1109.218563]  vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
[ 1109.223747]  amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
[ 1109.229650]  amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
[ 1109.235577]  amdgpu_pci_remove+0x37/0x70 [amdgpu]
[ 1109.240853]  pci_device_remove+0x3b/0xb0
[ 1109.245127]  device_release_driver_internal+0x100/0x1d0
[ 1109.250857]  device_release_driver+0x12/0x20
[ 1109.255535]  pci_stop_bus_device+0x79/0xa0
[ 1109.260016] pci_stop_and_remove_bus_device_locked+0x1b/0x30
[ 1109.266197]  remove_store+0x7b/0x90
[ 1109.269990]  dev_attr_store+0x14/0x30
[ 1109.274002]  sysfs_kf_write+0x48/0x60
[ 1109.277998]  kernfs_fop_write_iter+0x14e/0x1e0

________________________________________
发件人: Pan, Xinhui <Xinhui.Pan@xxxxxxx>
发送时间: 2021年9月15日 14:37
收件人: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, Xinhui 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath

We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.

perf top says below,
75.81%  [kernel]       [k] __srcu_read_unlock
  6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
  3.56%  [kernel]       [k] __srcu_read_lock
  2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
  2.20%  [kernel]       [k] __sg_page_iter_dma_next
  2.15%  [drm]          [k] drm_dev_enter
  1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
  1.18%  [kernel]       [k] __sg_alloc_table_from_pages
  1.09%  [drm]          [k] drm_dev_exit

So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.

Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx>
---
change from v1:
add enter/exit in more gmc_set_pte_pde callers
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
  3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 76efd5f8950f..d7e4f4660acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
  #include <asm/set_memory.h>
  #endif
  #include "amdgpu.h"
+#include <drm/drm_drv.h>

  /*
   * GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
         u64 page_base;
         /* Starting from VEGA10, system bit must be 0 to mean invalid. */
         uint64_t flags = 0;
+       int idx;

         if (!adev->gart.ready) {
                 WARN(1, "trying to unbind memory from uninitialized GART !\n");
                 return -EINVAL;
         }

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return 0;
+
         t = offset / AMDGPU_GPU_PAGE_SIZE;
         p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
         for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
         for (i = 0; i < adev->num_vmhubs; i++)
                 amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);

+       drm_dev_exit(idx);
         return 0;
  }

@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
  {
         uint64_t page_base;
         unsigned i, j, t;
+       int idx;

         if (!adev->gart.ready) {
                 WARN(1, "trying to bind memory to uninitialized GART !\n");
                 return -EINVAL;
         }

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return 0;
+
         t = offset / AMDGPU_GPU_PAGE_SIZE;

         for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
                         page_base += AMDGPU_GPU_PAGE_SIZE;
                 }
         }
+       drm_dev_exit(idx);
         return 0;
  }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 54f059501a33..1427fd70310c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
  {
         void __iomem *ptr = (void *)cpu_pt_addr;
         uint64_t value;
-       int idx;
-
-       if (!drm_dev_enter(&adev->ddev, &idx))
-               return 0;

         /*
          * The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
         value |= flags;
         writeq(value, ptr + (gpu_page_idx * 8));

-       drm_dev_exit(idx);
-
         return 0;
  }

@@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)                  adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
         u64 vram_end = vram_addr + vram_size;
         u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+       int idx;
+
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return;

         flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
         flags |= AMDGPU_PTE_WRITEABLE;
@@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
         flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
         /* Requires gart_ptb_gpu_pa to be 4K aligned */
         amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+       drm_dev_exit(idx);
  }

  /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0126ece898da..daa16d2f89da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
         struct amdgpu_bo *bo = &vmbo->bo;
         unsigned entries, ats_entries;
         uint64_t addr;
-       int r;
+       int r, idx;

         /* Figure out our place in the hierarchy */
         if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                         return r;
         }

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return -ENODEV;
+
         r = vm->update_funcs->map_table(vmbo);
         if (r)
-               return r;
+               goto exit;

         memset(&params, 0, sizeof(params));
         params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,

         r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
         if (r)
-               return r;
+               goto exit;

         addr = 0;
         if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,                  r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
                                              value, flags);
                 if (r)
-                       return r;
+                       goto exit;

                 addr += ats_entries * 8;
         }
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,                  r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
                                              value, flags);
                 if (r)
-                       return r;
+                       goto exit;
         }

-       return vm->update_funcs->commit(&params, NULL);
+       r = vm->update_funcs->commit(&params, NULL);
+exit:
+       drm_dev_exit(idx);
+       return r;
  }

  /**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
                           struct amdgpu_vm *vm, bool immediate)
  {
         struct amdgpu_vm_update_params params;
-       int r;
+       int r, idx;

         if (list_empty(&vm->relocated))
                 return 0;

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return -ENODEV;
+
         memset(&params, 0, sizeof(params));
         params.adev = adev;
         params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,

         r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
         if (r)
-               return r;
+               goto exit;

         while (!list_empty(&vm->relocated)) {
                 struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
         r = vm->update_funcs->commit(&params, &vm->last_update);
         if (r)
                 goto error;
+       drm_dev_exit(idx);
         return 0;

  error:
         amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+       drm_dev_exit(idx);
         return r;
  }

--
2.25.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux