[Public]
Ping
发件人: Yu, Lang <Lang.Yu@xxxxxxx>
发送时间: 星期二, 八月 1, 2023 15:16
收件人: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
抄送: Koenig, Christian <Christian.Koenig@xxxxxxx>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@xxxxxxx>; Zhang, Yifan <Yifan1.Zhang@xxxxxxx>; Yu, Lang <Lang.Yu@xxxxxxx>
主题: [PATCH v3] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
发送时间: 星期二, 八月 1, 2023 15:16
收件人: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>
抄送: Koenig, Christian <Christian.Koenig@xxxxxxx>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam@xxxxxxx>; Zhang, Yifan <Yifan1.Zhang@xxxxxxx>; Yu, Lang <Lang.Yu@xxxxxxx>
主题: [PATCH v3] drm/amdgpu: refine amdgpu_bo_create_kernel_at()
Use amdgpu_bo_create_reserved() to create a BO in VRAM
domain would fail if requested VRAM size is large(>128MB)
on APU which usually has a default 512MB VRAM.
That's because VRAM is framgented after several allocations.
The approach is using amdgpu_bo_create_reserved() to
create a BO in CPU domain first, it will always succeed.
v2: Don't overwrite the contents at specific offset.
v3: Don't return GPU addr.
Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ff73cc11d47e..df5ba9509a41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -377,27 +377,36 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
size = ALIGN(size, PAGE_SIZE);
r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
- cpu_addr);
+ AMDGPU_GEM_DOMAIN_CPU,
+ bo_ptr, NULL, NULL);
if (r)
return r;
if ((*bo_ptr) == NULL)
return 0;
+ (*bo_ptr)->preferred_domains = AMDGPU_GEM_DOMAIN_VRAM;
+ (*bo_ptr)->allowed_domains = (*bo_ptr)->preferred_domains;
+ (*bo_ptr)->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ (*bo_ptr)->flags |= cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
+ : AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+
/*
* Remove the original mem node and create a new one at the request
* position.
*/
- if (cpu_addr)
- amdgpu_bo_kunmap(*bo_ptr);
-
ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
(*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
(*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+ (*bo_ptr)->placements[i].mem_type = TTM_PL_VRAM;
+ (*bo_ptr)->placements[i].flags = TTM_PL_FLAG_CONTIGUOUS;
+
+ if (!((*bo_ptr)->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED))
+ (*bo_ptr)->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
}
+
r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
&(*bo_ptr)->tbo.resource, &ctx);
if (r)
--
2.25.1
domain would fail if requested VRAM size is large(>128MB)
on APU which usually has a default 512MB VRAM.
That's because VRAM is framgented after several allocations.
The approach is using amdgpu_bo_create_reserved() to
create a BO in CPU domain first, it will always succeed.
v2: Don't overwrite the contents at specific offset.
v3: Don't return GPU addr.
Signed-off-by: Lang Yu <Lang.Yu@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ff73cc11d47e..df5ba9509a41 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -377,27 +377,36 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
size = ALIGN(size, PAGE_SIZE);
r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
- cpu_addr);
+ AMDGPU_GEM_DOMAIN_CPU,
+ bo_ptr, NULL, NULL);
if (r)
return r;
if ((*bo_ptr) == NULL)
return 0;
+ (*bo_ptr)->preferred_domains = AMDGPU_GEM_DOMAIN_VRAM;
+ (*bo_ptr)->allowed_domains = (*bo_ptr)->preferred_domains;
+ (*bo_ptr)->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ (*bo_ptr)->flags |= cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED
+ : AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+
/*
* Remove the original mem node and create a new one at the request
* position.
*/
- if (cpu_addr)
- amdgpu_bo_kunmap(*bo_ptr);
-
ttm_resource_free(&(*bo_ptr)->tbo, &(*bo_ptr)->tbo.resource);
for (i = 0; i < (*bo_ptr)->placement.num_placement; ++i) {
(*bo_ptr)->placements[i].fpfn = offset >> PAGE_SHIFT;
(*bo_ptr)->placements[i].lpfn = (offset + size) >> PAGE_SHIFT;
+ (*bo_ptr)->placements[i].mem_type = TTM_PL_VRAM;
+ (*bo_ptr)->placements[i].flags = TTM_PL_FLAG_CONTIGUOUS;
+
+ if (!((*bo_ptr)->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED))
+ (*bo_ptr)->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
}
+
r = ttm_bo_mem_space(&(*bo_ptr)->tbo, &(*bo_ptr)->placement,
&(*bo_ptr)->tbo.resource, &ctx);
if (r)
--
2.25.1