Am 2021-07-13 um 9:32 a.m. schrieb Christian König: > For allocations larger than 48MiB we need more than a page for the > housekeeping in the worst case resulting in the usual vmalloc overhead. > > Try to avoid this by assuming the good case and only falling back to the > worst case if this didn't worked. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 80 +++++++++++++++----- > 1 file changed, 60 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > index 2fd77c36a1ff..ab8c5e28df7b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -361,19 +361,23 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, > * @man: TTM memory type manager > * @tbo: TTM BO we need this range for > * @place: placement flags and restrictions > - * @mem: the resulting mem object > + * @num_nodes: number of page nodes to use. > + * @pages_per_node: number of pages per node to use. > + * @res: the resulting mem object > * > * Allocate VRAM for the given BO. > */ > static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > struct ttm_buffer_object *tbo, > const struct ttm_place *place, > + unsigned long num_nodes, > + unsigned long pages_per_node, > struct ttm_resource **res) > { > - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; > struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); > struct amdgpu_device *adev = to_amdgpu_device(mgr); > uint64_t vis_usage = 0, mem_bytes, max_bytes; > + unsigned long lpfn, pages_left, pages; > struct ttm_range_mgr_node *node; > struct drm_mm *mm = &mgr->mm; > enum drm_mm_insert_mode mode; > @@ -395,21 +399,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > goto error_sub; > } > > - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { > - pages_per_node = ~0ul; > - num_nodes = 1; > - } else { > -#ifdef CONFIG_TRANSPARENT_HUGEPAGE > - pages_per_node = HPAGE_PMD_NR; > -#else > - /* default to 2MB */ > - pages_per_node = 2UL << (20UL - PAGE_SHIFT); > -#endif > - pages_per_node = max_t(uint32_t, pages_per_node, > - tbo->page_alignment); > - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); > - } > - > node = kvmalloc(struct_size(node, mm_nodes, num_nodes), > GFP_KERNEL | __GFP_ZERO); > if (!node) { > @@ -431,10 +420,15 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > i = 0; > spin_lock(&mgr->lock); > while (pages_left) { > - uint32_t alignment = tbo->page_alignment; > + unsigned long alignment = tbo->page_alignment; > + > + if (i >= num_nodes) { > + r = -E2BIG; > + goto error_free; > + } > > if (pages >= pages_per_node) > - alignment = pages_per_node; > + alignment = max(alignment, pages_per_node); I don't understand this change. Is this an unrelated fix? pages_per_node is already bumped up to tbo->page_alignment in amdgpu_vram_mgr_alloc. So this "max" operation here seems redundant. Other than that, the patch is Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> @JinHuiEric, can you confirm the performance improvement? Thanks, Felix > > r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, > alignment, 0, place->fpfn, > @@ -483,6 +477,52 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, > return r; > } > > +/** > + * amdgpu_vram_mgr_alloc - allocate new range > + * > + * @man: TTM memory type manager > + * @tbo: TTM BO we need this range for > + * @place: placement flags and restrictions > + * @res: the resulting mem object > + * > + * Allocate VRAM for the given BO. > + */ > +static int amdgpu_vram_mgr_alloc(struct ttm_resource_manager *man, > + struct ttm_buffer_object *tbo, > + const struct ttm_place *place, > + struct ttm_resource **res) > +{ > + unsigned long num_nodes, pages_per_node; > + struct ttm_range_mgr_node *node; > + int r; > + > + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) > + return amdgpu_vram_mgr_new(man, tbo, place, 1, ~0ul, res); > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + pages_per_node = HPAGE_PMD_NR; > +#else > + /* default to 2MB */ > + pages_per_node = 2UL << (20UL - PAGE_SHIFT); > +#endif > + pages_per_node = max_t(uint32_t, pages_per_node, tbo->page_alignment); > + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(tbo->base.size), pages_per_node); > + > + if (struct_size(node, mm_nodes, num_nodes) > PAGE_SIZE) { > + size_t size = PAGE_SIZE; > + > + size -= sizeof(struct ttm_range_mgr_node); > + size /= sizeof(struct drm_mm_node); > + r = amdgpu_vram_mgr_new(man, tbo, place, size, pages_per_node, > + res); > + if (r != -E2BIG) > + return r; > + } > + > + return amdgpu_vram_mgr_new(man, tbo, place, num_nodes, pages_per_node, > + res); > +} > + > /** > * amdgpu_vram_mgr_del - free ranges > * > @@ -680,7 +720,7 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, > } > > static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { > - .alloc = amdgpu_vram_mgr_new, > + .alloc = amdgpu_vram_mgr_alloc, > .free = amdgpu_vram_mgr_del, > .debug = amdgpu_vram_mgr_debug > }; _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx