Merge the two loops, loosen the restriction for big allocations.
This reduces the CPU overhead in the good case, but increases
it a bit under memory pressure.
Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 58 +++++++++-----------
1 file changed, 27 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 529c5c32a205..e2cbe19404c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -358,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource *mem)
{
+ unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ uint64_t vis_usage = 0, mem_bytes, max_bytes;
struct drm_mm *mm = &mgr->mm;
- struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
- unsigned long lpfn, num_nodes, pages_per_node, pages_left;
- uint64_t vis_usage = 0, mem_bytes, max_bytes;
+ struct drm_mm_node *nodes;
unsigned i;
int r;
@@ -391,9 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
pages_per_node = HPAGE_PMD_NR;
#else
/* default to 2MB */
- pages_per_node = (2UL << (20UL - PAGE_SHIFT));
+ pages_per_node = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_node = max((uint32_t)pages_per_node, mem->page_alignment);
+ pages_per_node = max_t(uint32_t, pages_per_node,
+ mem->page_alignment);
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
}
@@ -411,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
mem->start = 0;
pages_left = mem->num_pages;
- spin_lock(&mgr->lock);
- for (i = 0; pages_left >= pages_per_node; ++i) {
- unsigned long pages = rounddown_pow_of_two(pages_left);
-
- /* Limit maximum size to 2GB due to SG table limitations */
- pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
+ /* Limit maximum size to 2GB due to SG table limitations */
+ pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
- r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
- pages_per_node, 0,
- place->fpfn, lpfn,
- mode);
- if (unlikely(r))
- break;
-
- vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
- amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
- pages_left -= pages;
- }
-
- for (; pages_left; ++i) {
- unsigned long pages = min(pages_left, pages_per_node);
+ i = 0;
+ spin_lock(&mgr->lock);
+ while (pages_left) {
uint32_t alignment = mem->page_alignment;
- if (pages == pages_per_node)
+ if (pages >= pages_per_node)
alignment = pages_per_node;
- r = drm_mm_insert_node_in_range(mm, &nodes[i],
- pages, alignment, 0,
- place->fpfn, lpfn,
- mode);
- if (unlikely(r))
+ r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
+ 0, place->fpfn, lpfn, mode);
+ if (unlikely(r)) {
+ if (pages > pages_per_node) {
+ if (is_power_of_2(pages))
+ pages = pages / 2;
+ else
+ pages = rounddown_pow_of_two(pages);
+ continue;
+ }
goto error;
+ }
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
+ ++i;
+
+ if (pages > pages_left)
+ pages = pages_left;
}
spin_unlock(&mgr->lock);