Re: [PATCH] drm/amdgpu: further lower VRAM allocation overhead

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Christian,

I have pushed it into amd-staging-dkms-5.11, but it causes a regression with test TransferBench on MI200. Jira is here:
https://ontrack-internal.amd.com/browse/SWDEV-295245

Can you please take a look? Thanks!

Regards,
Eric

On 2021-07-14 4:33 a.m., Christian König wrote:
Hi Eric,

feel free to push into amd-staging-dkms-5.11, but please don't push it into amd-staging-drm-next.

The later will just cause a merge failure which Alex needs to resolve manually.

I can take care of pushing to amd-staging-drm-next as soon as that is rebased on latest upstream.

Regards,
Christian.

Am 13.07.21 um 21:19 schrieb Eric Huang:
Hi Christian/Felix,

If you don't have objection, it will be pushed into amd-staging-dkms-5.11 and amd-staging-drm-next.

Thanks,
Eric

On 2021-07-13 3:17 p.m., Eric Huang wrote:
For allocations larger than 48MiB we need more than a page for the
housekeeping in the worst case resulting in the usual vmalloc overhead.

Try to avoid this by assuming the good case and only falling back to the
worst case if this didn't worked.

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx>
Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 71 +++++++++++++++-----
  1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index be4261c4512e..ecbe05e1db66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -361,9 +361,11 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
  static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
                     struct ttm_buffer_object *tbo,
                     const struct ttm_place *place,
+                   unsigned long num_nodes,
+                   unsigned long pages_per_node,
                     struct ttm_resource *mem)
  {
-    unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+    unsigned long lpfn, pages_left, pages;
      struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
      struct amdgpu_device *adev = to_amdgpu_device(mgr);
      uint64_t vis_usage = 0, mem_bytes, max_bytes;
@@ -393,21 +395,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
          return -ENOSPC;
      }
  -    if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
-        pages_per_node = ~0ul;
-        num_nodes = 1;
-    } else {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-        pages_per_node = HPAGE_PMD_NR;
-#else
-        /* default to 2MB */
-        pages_per_node = 2UL << (20UL - PAGE_SHIFT);
-#endif
-        pages_per_node = max_t(uint32_t, pages_per_node,
-                       mem->page_alignment);
-        num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
-    }
-
      nodes = kvmalloc_array((uint32_t)num_nodes, sizeof(*nodes),
                     GFP_KERNEL | __GFP_ZERO);
      if (!nodes) {
@@ -435,7 +422,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
      i = 0;
      spin_lock(&mgr->lock);
      while (pages_left) {
-        uint32_t alignment = mem->page_alignment;
+        unsigned long alignment = mem->page_alignment;
+
+        if (i >= num_nodes) {
+            r = -E2BIG;
+            goto error;
+        }
            if (pages >= pages_per_node)
              alignment = pages_per_node;
@@ -492,6 +484,49 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
      return r;
  }
  +/**
+ * amdgpu_vram_mgr_alloc - allocate new range
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: the resulting mem object
+ *
+ * Allocate VRAM for the given BO.
+ */
+static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man,
+                 struct ttm_buffer_object *tbo,
+                 const struct ttm_place *place,
+                 struct ttm_resource *mem)
+{
+    unsigned long num_nodes, pages_per_node;
+    int r;
+
+    if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+        return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, mem);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+    pages_per_node = HPAGE_PMD_NR;
+#else
+    /* default to 2MB */
+    pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+#endif
+    pages_per_node = max_t(uint32_t, pages_per_node,
+                   mem->page_alignment);
+    num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
+
+    if (sizeof(struct drm_mm_node) * num_nodes > PAGE_SIZE) {
+        r = amdgpu_vram_mgr_new(man, tbo, place,
+                PAGE_SIZE / sizeof(struct drm_mm_node),
+                pages_per_node,    mem);
+        if (r != -E2BIG)
+            return r;
+    }
+
+    return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node,
+                   mem);
+}
+
  /**
   * amdgpu_vram_mgr_del - free ranges
   *
@@ -693,7 +728,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
  }
    static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
-    .alloc    = amdgpu_vram_mgr_new,
+    .alloc    = amdgpu_vram_mgr_alloc,
      .free    = amdgpu_vram_mgr_del,
      .debug    = amdgpu_vram_mgr_debug
  };

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Cjinhuieric.huang%40amd.com%7C1f368defa88042677f4008d946a217c8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637618484270197021%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=wvuHNuK1gQBYbErvhBO%2FambKpNzjHL2A9ea22fvQMkY%3D&amp;reserved=0


_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux