On 14/02/22 2:42 pm, Christian König wrote: > > > Am 14.02.22 um 09:36 schrieb Matthew Auld: >> On Mon, 14 Feb 2022 at 06:32, Christian König >> <ckoenig.leichtzumerken@xxxxxxxxx> wrote: >>> Am 13.02.22 um 09:52 schrieb Arunpravin: >>>> - Make drm_buddy_alloc a single function to handle >>>> range allocation and non-range allocation demands >>>> >>>> - Implemented a new function alloc_range() which allocates >>>> the requested power-of-two block comply with range limitations >>>> >>>> - Moved order computation and memory alignment logic from >>>> i915 driver to drm buddy >>>> >>>> v2: >>>> merged below changes to keep the build unbroken >>>> - drm_buddy_alloc_range() becomes obsolete and may be removed >>>> - enable ttm range allocation (fpfn / lpfn) support in i915 driver >>>> - apply enhanced drm_buddy_alloc() function to i915 driver >>>> >>>> v3(Matthew Auld): >>>> - Fix alignment issues and remove unnecessary list_empty check >>>> - add more validation checks for input arguments >>>> - make alloc_range() block allocations as bottom-up >>>> - optimize order computation logic >>>> - replace uint64_t with u64, which is preferred in the kernel >>>> >>>> v4(Matthew Auld): >>>> - keep drm_buddy_alloc_range() function implementation for generic >>>> actual range allocations >>>> - keep alloc_range() implementation for end bias allocations >>>> >>>> v5(Matthew Auld): >>>> - modify drm_buddy_alloc() passing argument place->lpfn to lpfn >>>> as place->lpfn will currently always be zero for i915 >>>> >>>> v6(Matthew Auld): >>>> - fixup potential uaf - If we are unlucky and can't allocate >>>> enough memory when splitting blocks, where we temporarily >>>> end up with the given block and its buddy on the respective >>>> free list, then we need to ensure we delete both blocks, >>>> and no just the buddy, before potentially freeing them >>>> >>>> - fix warnings reported by kernel test robot <lkp@xxxxxxxxx> >>>> >>>> v7(Matthew Auld): >>>> - revert fixup potential uaf >>>> - keep __alloc_range() add node to the list logic same as >>>> drm_buddy_alloc_blocks() by having a temporary list variable >>>> - at drm_buddy_alloc_blocks() keep i915 range_overflows macro >>>> and add a new check for end variable >>>> >>>> v8: >>>> - fix warnings reported by kernel test robot <lkp@xxxxxxxxx> >>>> >>>> v9(Matthew Auld): >>>> - remove DRM_BUDDY_RANGE_ALLOCATION flag >>>> - remove unnecessary function description >>>> >>>> Signed-off-by: Arunpravin <Arunpravin.PaneerSelvam@xxxxxxx> >>>> Reviewed-by: Matthew Auld <matthew.auld@xxxxxxxxx> >>> As long as nobody objects I'm going to push patches 1-3 to drm-misc-next >>> in the next hour or so: >> As part of this could you also push >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F99842%2F&data=04%7C01%7CArunpravin.PaneerSelvam%40amd.com%7Cc50a2b13b2a0425e596f08d9ef9a2d60%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637804268194961068%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=ENxu%2BSquLubBYLkNYV1SIUau1u7aZMdjz22izvv3FvM%3D&reserved=0 ? > > Sure, but Arun said in our internal chat that I should wait with that > anyway since he wanted to sort out one more issue. > > Christian. > working on 2 issues, 1. I think we need to keep DRM_BUDDY_RANGE_ALLOCATION flag, some corner case didnt allow amdgpu driver load 2. rebasing the existing amdgpu_vram_mgr.c and resolving all conflicts as there are many changes merged in with the below patch - drm/amdgpu: remove VRAM accounting v2 >> >>> Then going to take a deeper look into patches 4 and 5 to get them reviewed. >>> >>> Thanks, >>> Christian. >>> >>>> --- >>>> drivers/gpu/drm/drm_buddy.c | 292 +++++++++++++----- >>>> drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 63 ++-- >>>> drivers/gpu/drm/i915/i915_ttm_buddy_manager.h | 2 + >>>> include/drm/drm_buddy.h | 11 +- >>>> 4 files changed, 250 insertions(+), 118 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c >>>> index d60878bc9c20..e0c0d786a572 100644 >>>> --- a/drivers/gpu/drm/drm_buddy.c >>>> +++ b/drivers/gpu/drm/drm_buddy.c >>>> @@ -282,23 +282,97 @@ void drm_buddy_free_list(struct drm_buddy *mm, struct list_head *objects) >>>> } >>>> EXPORT_SYMBOL(drm_buddy_free_list); >>>> >>>> -/** >>>> - * drm_buddy_alloc_blocks - allocate power-of-two blocks >>>> - * >>>> - * @mm: DRM buddy manager to allocate from >>>> - * @order: size of the allocation >>>> - * >>>> - * The order value here translates to: >>>> - * >>>> - * 0 = 2^0 * mm->chunk_size >>>> - * 1 = 2^1 * mm->chunk_size >>>> - * 2 = 2^2 * mm->chunk_size >>>> - * >>>> - * Returns: >>>> - * allocated ptr to the &drm_buddy_block on success >>>> - */ >>>> -struct drm_buddy_block * >>>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order) >>>> +static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) >>>> +{ >>>> + return s1 <= e2 && e1 >= s2; >>>> +} >>>> + >>>> +static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) >>>> +{ >>>> + return s1 <= s2 && e1 >= e2; >>>> +} >>>> + >>>> +static struct drm_buddy_block * >>>> +alloc_range_bias(struct drm_buddy *mm, >>>> + u64 start, u64 end, >>>> + unsigned int order) >>>> +{ >>>> + struct drm_buddy_block *block; >>>> + struct drm_buddy_block *buddy; >>>> + LIST_HEAD(dfs); >>>> + int err; >>>> + int i; >>>> + >>>> + end = end - 1; >>>> + >>>> + for (i = 0; i < mm->n_roots; ++i) >>>> + list_add_tail(&mm->roots[i]->tmp_link, &dfs); >>>> + >>>> + do { >>>> + u64 block_start; >>>> + u64 block_end; >>>> + >>>> + block = list_first_entry_or_null(&dfs, >>>> + struct drm_buddy_block, >>>> + tmp_link); >>>> + if (!block) >>>> + break; >>>> + >>>> + list_del(&block->tmp_link); >>>> + >>>> + if (drm_buddy_block_order(block) < order) >>>> + continue; >>>> + >>>> + block_start = drm_buddy_block_offset(block); >>>> + block_end = block_start + drm_buddy_block_size(mm, block) - 1; >>>> + >>>> + if (!overlaps(start, end, block_start, block_end)) >>>> + continue; >>>> + >>>> + if (drm_buddy_block_is_allocated(block)) >>>> + continue; >>>> + >>>> + if (contains(start, end, block_start, block_end) && >>>> + order == drm_buddy_block_order(block)) { >>>> + /* >>>> + * Find the free block within the range. >>>> + */ >>>> + if (drm_buddy_block_is_free(block)) >>>> + return block; >>>> + >>>> + continue; >>>> + } >>>> + >>>> + if (!drm_buddy_block_is_split(block)) { >>>> + err = split_block(mm, block); >>>> + if (unlikely(err)) >>>> + goto err_undo; >>>> + } >>>> + >>>> + list_add(&block->right->tmp_link, &dfs); >>>> + list_add(&block->left->tmp_link, &dfs); >>>> + } while (1); >>>> + >>>> + return ERR_PTR(-ENOSPC); >>>> + >>>> +err_undo: >>>> + /* >>>> + * We really don't want to leave around a bunch of split blocks, since >>>> + * bigger is better, so make sure we merge everything back before we >>>> + * free the allocated blocks. >>>> + */ >>>> + buddy = get_buddy(block); >>>> + if (buddy && >>>> + (drm_buddy_block_is_free(block) && >>>> + drm_buddy_block_is_free(buddy))) >>>> + __drm_buddy_free(mm, block); >>>> + return ERR_PTR(err); >>>> +} >>>> + >>>> +static struct drm_buddy_block * >>>> +alloc_from_freelist(struct drm_buddy *mm, >>>> + unsigned int order, >>>> + unsigned long flags) >>>> { >>>> struct drm_buddy_block *block = NULL; >>>> unsigned int i; >>>> @@ -320,78 +394,29 @@ drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order) >>>> while (i != order) { >>>> err = split_block(mm, block); >>>> if (unlikely(err)) >>>> - goto out_free; >>>> + goto err_undo; >>>> >>>> - /* Go low */ >>>> - block = block->left; >>>> + block = block->right; >>>> i--; >>>> } >>>> - >>>> - mark_allocated(block); >>>> - mm->avail -= drm_buddy_block_size(mm, block); >>>> - kmemleak_update_trace(block); >>>> return block; >>>> >>>> -out_free: >>>> +err_undo: >>>> if (i != order) >>>> __drm_buddy_free(mm, block); >>>> return ERR_PTR(err); >>>> } >>>> -EXPORT_SYMBOL(drm_buddy_alloc_blocks); >>>> - >>>> -static inline bool overlaps(u64 s1, u64 e1, u64 s2, u64 e2) >>>> -{ >>>> - return s1 <= e2 && e1 >= s2; >>>> -} >>>> - >>>> -static inline bool contains(u64 s1, u64 e1, u64 s2, u64 e2) >>>> -{ >>>> - return s1 <= s2 && e1 >= e2; >>>> -} >>>> >>>> -/** >>>> - * drm_buddy_alloc_range - allocate range >>>> - * >>>> - * @mm: DRM buddy manager to allocate from >>>> - * @blocks: output list head to add allocated blocks >>>> - * @start: start of the allowed range for this block >>>> - * @size: size of the allocation >>>> - * >>>> - * Intended for pre-allocating portions of the address space, for example to >>>> - * reserve a block for the initial framebuffer or similar, hence the expectation >>>> - * here is that drm_buddy_alloc_blocks() is still the main vehicle for >>>> - * allocations, so if that's not the case then the drm_mm range allocator is >>>> - * probably a much better fit, and so you should probably go use that instead. >>>> - * >>>> - * Note that it's safe to chain together multiple alloc_ranges >>>> - * with the same blocks list >>>> - * >>>> - * Returns: >>>> - * 0 on success, error code on failure. >>>> - */ >>>> -int drm_buddy_alloc_range(struct drm_buddy *mm, >>>> - struct list_head *blocks, >>>> - u64 start, u64 size) >>>> +static int __alloc_range(struct drm_buddy *mm, >>>> + struct list_head *dfs, >>>> + u64 start, u64 size, >>>> + struct list_head *blocks) >>>> { >>>> struct drm_buddy_block *block; >>>> struct drm_buddy_block *buddy; >>>> LIST_HEAD(allocated); >>>> - LIST_HEAD(dfs); >>>> u64 end; >>>> int err; >>>> - int i; >>>> - >>>> - if (size < mm->chunk_size) >>>> - return -EINVAL; >>>> - >>>> - if (!IS_ALIGNED(size | start, mm->chunk_size)) >>>> - return -EINVAL; >>>> - >>>> - if (range_overflows(start, size, mm->size)) >>>> - return -EINVAL; >>>> - >>>> - for (i = 0; i < mm->n_roots; ++i) >>>> - list_add_tail(&mm->roots[i]->tmp_link, &dfs); >>>> >>>> end = start + size - 1; >>>> >>>> @@ -399,7 +424,7 @@ int drm_buddy_alloc_range(struct drm_buddy *mm, >>>> u64 block_start; >>>> u64 block_end; >>>> >>>> - block = list_first_entry_or_null(&dfs, >>>> + block = list_first_entry_or_null(dfs, >>>> struct drm_buddy_block, >>>> tmp_link); >>>> if (!block) >>>> @@ -436,8 +461,8 @@ int drm_buddy_alloc_range(struct drm_buddy *mm, >>>> goto err_undo; >>>> } >>>> >>>> - list_add(&block->right->tmp_link, &dfs); >>>> - list_add(&block->left->tmp_link, &dfs); >>>> + list_add(&block->right->tmp_link, dfs); >>>> + list_add(&block->left->tmp_link, dfs); >>>> } while (1); >>>> >>>> list_splice_tail(&allocated, blocks); >>>> @@ -459,7 +484,120 @@ int drm_buddy_alloc_range(struct drm_buddy *mm, >>>> drm_buddy_free_list(mm, &allocated); >>>> return err; >>>> } >>>> -EXPORT_SYMBOL(drm_buddy_alloc_range); >>>> + >>>> +static int __drm_buddy_alloc_range(struct drm_buddy *mm, >>>> + u64 start, >>>> + u64 size, >>>> + struct list_head *blocks) >>>> +{ >>>> + LIST_HEAD(dfs); >>>> + int i; >>>> + >>>> + for (i = 0; i < mm->n_roots; ++i) >>>> + list_add_tail(&mm->roots[i]->tmp_link, &dfs); >>>> + >>>> + return __alloc_range(mm, &dfs, start, size, blocks); >>>> +} >>>> + >>>> +/** >>>> + * drm_buddy_alloc_blocks - allocate power-of-two blocks >>>> + * >>>> + * @mm: DRM buddy manager to allocate from >>>> + * @start: start of the allowed range for this block >>>> + * @end: end of the allowed range for this block >>>> + * @size: size of the allocation >>>> + * @min_page_size: alignment of the allocation >>>> + * @blocks: output list head to add allocated blocks >>>> + * @flags: DRM_BUDDY_*_ALLOCATION flags >>>> + * >>>> + * alloc_range_bias() called on range limitations, which traverses >>>> + * the tree and returns the desired block. >>>> + * >>>> + * alloc_from_freelist() called when *no* range restrictions >>>> + * are enforced, which picks the block from the freelist. >>>> + * >>>> + * Returns: >>>> + * 0 on success, error code on failure. >>>> + */ >>>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm, >>>> + u64 start, u64 end, u64 size, >>>> + u64 min_page_size, >>>> + struct list_head *blocks, >>>> + unsigned long flags) >>>> +{ >>>> + struct drm_buddy_block *block = NULL; >>>> + unsigned int min_order, order; >>>> + unsigned long pages; >>>> + LIST_HEAD(allocated); >>>> + int err; >>>> + >>>> + if (size < mm->chunk_size) >>>> + return -EINVAL; >>>> + >>>> + if (min_page_size < mm->chunk_size) >>>> + return -EINVAL; >>>> + >>>> + if (!is_power_of_2(min_page_size)) >>>> + return -EINVAL; >>>> + >>>> + if (!IS_ALIGNED(start | end | size, mm->chunk_size)) >>>> + return -EINVAL; >>>> + >>>> + if (end > mm->size) >>>> + return -EINVAL; >>>> + >>>> + if (range_overflows(start, size, mm->size)) >>>> + return -EINVAL; >>>> + >>>> + /* Actual range allocation */ >>>> + if (start + size == end) >>>> + return __drm_buddy_alloc_range(mm, start, size, blocks); >>>> + >>>> + pages = size >> ilog2(mm->chunk_size); >>>> + order = fls(pages) - 1; >>>> + min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); >>>> + >>>> + do { >>>> + order = min(order, (unsigned int)fls(pages) - 1); >>>> + BUG_ON(order > mm->max_order); >>>> + BUG_ON(order < min_order); >>>> + >>>> + do { >>>> + if (start || end != size) >>>> + /* Allocate traversing within the range */ >>>> + block = alloc_range_bias(mm, start, end, order); >>>> + else >>>> + /* Allocate from freelist */ >>>> + block = alloc_from_freelist(mm, order, flags); >>>> + >>>> + if (!IS_ERR(block)) >>>> + break; >>>> + >>>> + if (order-- == min_order) { >>>> + err = -ENOSPC; >>>> + goto err_free; >>>> + } >>>> + } while (1); >>>> + >>>> + mark_allocated(block); >>>> + mm->avail -= drm_buddy_block_size(mm, block); >>>> + kmemleak_update_trace(block); >>>> + list_add_tail(&block->link, &allocated); >>>> + >>>> + pages -= BIT(order); >>>> + >>>> + if (!pages) >>>> + break; >>>> + } while (1); >>>> + >>>> + list_splice_tail(&allocated, blocks); >>>> + return 0; >>>> + >>>> +err_free: >>>> + drm_buddy_free_list(mm, &allocated); >>>> + return err; >>>> +} >>>> +EXPORT_SYMBOL(drm_buddy_alloc_blocks); >>>> >>>> /** >>>> * drm_buddy_block_print - print block information >>>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c >>>> index 247714bab044..7aef6ad9fe84 100644 >>>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c >>>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c >>>> @@ -36,13 +36,14 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, >>>> struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); >>>> struct i915_ttm_buddy_resource *bman_res; >>>> struct drm_buddy *mm = &bman->mm; >>>> - unsigned long n_pages; >>>> - unsigned int min_order; >>>> + unsigned long n_pages, lpfn; >>>> u64 min_page_size; >>>> u64 size; >>>> int err; >>>> >>>> - GEM_BUG_ON(place->fpfn || place->lpfn); >>>> + lpfn = place->lpfn; >>>> + if (!lpfn) >>>> + lpfn = man->size; >>>> >>>> bman_res = kzalloc(sizeof(*bman_res), GFP_KERNEL); >>>> if (!bman_res) >>>> @@ -60,10 +61,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, >>>> min_page_size = bo->page_alignment << PAGE_SHIFT; >>>> >>>> GEM_BUG_ON(min_page_size < mm->chunk_size); >>>> - min_order = ilog2(min_page_size) - ilog2(mm->chunk_size); >>>> + >>>> if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { >>>> + unsigned long pages; >>>> + >>>> size = roundup_pow_of_two(size); >>>> - min_order = ilog2(size) - ilog2(mm->chunk_size); >>>> + min_page_size = size; >>>> + >>>> + pages = size >> ilog2(mm->chunk_size); >>>> + if (pages > lpfn) >>>> + lpfn = pages; >>>> } >>>> >>>> if (size > mm->size) { >>>> @@ -73,34 +80,16 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, >>>> >>>> n_pages = size >> ilog2(mm->chunk_size); >>>> >>>> - do { >>>> - struct drm_buddy_block *block; >>>> - unsigned int order; >>>> - >>>> - order = fls(n_pages) - 1; >>>> - GEM_BUG_ON(order > mm->max_order); >>>> - GEM_BUG_ON(order < min_order); >>>> - >>>> - do { >>>> - mutex_lock(&bman->lock); >>>> - block = drm_buddy_alloc_blocks(mm, order); >>>> - mutex_unlock(&bman->lock); >>>> - if (!IS_ERR(block)) >>>> - break; >>>> - >>>> - if (order-- == min_order) { >>>> - err = -ENOSPC; >>>> - goto err_free_blocks; >>>> - } >>>> - } while (1); >>>> - >>>> - n_pages -= BIT(order); >>>> - >>>> - list_add_tail(&block->link, &bman_res->blocks); >>>> - >>>> - if (!n_pages) >>>> - break; >>>> - } while (1); >>>> + mutex_lock(&bman->lock); >>>> + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, >>>> + (u64)lpfn << PAGE_SHIFT, >>>> + (u64)n_pages << PAGE_SHIFT, >>>> + min_page_size, >>>> + &bman_res->blocks, >>>> + bman_res->flags); >>>> + mutex_unlock(&bman->lock); >>>> + if (unlikely(err)) >>>> + goto err_free_blocks; >>>> >>>> *res = &bman_res->base; >>>> return 0; >>>> @@ -268,12 +257,16 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, >>>> { >>>> struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); >>>> struct drm_buddy *mm = &bman->mm; >>>> + unsigned long flags = 0; >>>> int ret; >>>> >>>> mutex_lock(&bman->lock); >>>> - ret = drm_buddy_alloc_range(mm, &bman->reserved, start, size); >>>> + ret = drm_buddy_alloc_blocks(mm, start, >>>> + start + size, >>>> + size, mm->chunk_size, >>>> + &bman->reserved, >>>> + flags); >>>> mutex_unlock(&bman->lock); >>>> >>>> return ret; >>>> } >>>> - >>>> diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h >>>> index 312077941411..72c90b432e87 100644 >>>> --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h >>>> +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h >>>> @@ -20,6 +20,7 @@ struct drm_buddy; >>>> * >>>> * @base: struct ttm_resource base class we extend >>>> * @blocks: the list of struct i915_buddy_block for this resource/allocation >>>> + * @flags: DRM_BUDDY_*_ALLOCATION flags >>>> * @mm: the struct i915_buddy_mm for this resource >>>> * >>>> * Extends the struct ttm_resource to manage an address space allocation with >>>> @@ -28,6 +29,7 @@ struct drm_buddy; >>>> struct i915_ttm_buddy_resource { >>>> struct ttm_resource base; >>>> struct list_head blocks; >>>> + unsigned long flags; >>>> struct drm_buddy *mm; >>>> }; >>>> >>>> diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h >>>> index f524db152413..1f2435426c69 100644 >>>> --- a/include/drm/drm_buddy.h >>>> +++ b/include/drm/drm_buddy.h >>>> @@ -131,12 +131,11 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size); >>>> >>>> void drm_buddy_fini(struct drm_buddy *mm); >>>> >>>> -struct drm_buddy_block * >>>> -drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned int order); >>>> - >>>> -int drm_buddy_alloc_range(struct drm_buddy *mm, >>>> - struct list_head *blocks, >>>> - u64 start, u64 size); >>>> +int drm_buddy_alloc_blocks(struct drm_buddy *mm, >>>> + u64 start, u64 end, u64 size, >>>> + u64 min_page_size, >>>> + struct list_head *blocks, >>>> + unsigned long flags); >>>> >>>> void drm_buddy_free_block(struct drm_buddy *mm, struct drm_buddy_block *block); >>>> >