[AMD Official Use Only - General] comments inline. ________________________________________ 发件人: Koenig, Christian <Christian.Koenig@xxxxxxx> 发送时间: 2022年11月29日 19:32 收件人: Pan, Xinhui; amd-gfx@xxxxxxxxxxxxxxxxxxxxx 抄送: daniel@xxxxxxxx; matthew.auld@xxxxxxxxx; dri-devel@xxxxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Paneer Selvam, Arunpravin; intel-gfx@xxxxxxxxxxxxxxxxxxxxx 主题: Re: [PATCH v4] drm: Optimise for continuous memory allocation Am 29.11.22 um 11:56 schrieb xinhui pan: > Currently drm-buddy does not have full knowledge of continuous memory. > > Lets consider scenario below. > order 1: L R > order 0: LL LR RL RR > for order 1 allocation, it can offer L or R or LR+RL. > > For now, we only implement L or R case for continuous memory allocation. > So this patch aims to implement the rest cases. > > Adding a new member leaf_link which links all leaf blocks in asceding > order. Now we can find more than 2 sub-order blocks easier. > Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1, > 0+1+2+0, 0+2+1+0. Well that description is a bit confusing and doesn't make to much sense to me. When you have two adjacent free order 0 blocks then those should be automatically combined into an order 1. This is a fundamental property of the buddy allocator, otherwise the whole algorithm won't work. [xh] sorry, The order above is not 4, should be 3. order 3 can be combined with corresponding order 3, 2+2, 1+2+1, 0+1+2+0, 0+2+1+0 the order 0 + 1 + 2 + 0 case does not have two same order 0 adjacent. they are in different tree. looks like below order 3: L3 R3 order 2: L2 (R2)* L2* order 1: L1 (R1) L1 order 0: L0 (R0) (L0) R0 + R1+R2 +L0 with () around combined to be order 3. R2 + L2 with * followed combined to be order 3. etc.... When you have the case of a free order 1 block with two adjacent free order 0 blocks then we a fragmented address space. In this case the best approach is to fail the allocation and start to swap things out. [xh] Eviction is expensive. And if it still fails to find the continuous memory with this approach, then let's evict. So what exactly is the goal here? Regards, Christian. > > Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx> > --- > change from v3: > reworked totally. adding leaf_link. > > change from v2: > search continuous block in nearby root if needed > > change from v1: > implement top-down continuous allocation > --- > drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++--- > include/drm/drm_buddy.h | 1 + > 2 files changed, 102 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c > index 11bb59399471..8edafb99b02c 100644 > --- a/drivers/gpu/drm/drm_buddy.c > +++ b/drivers/gpu/drm/drm_buddy.c > @@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) > { > unsigned int i; > u64 offset; > + LIST_HEAD(leaf); > > if (size < chunk_size) > return -EINVAL; > @@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) > goto out_free_roots; > > mark_free(mm, root); > + list_add_tail(&root->leaf_link, &leaf); > > BUG_ON(i > mm->max_order); > BUG_ON(drm_buddy_block_size(mm, root) < chunk_size); > @@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size) > i++; > } while (size); > > + list_del(&leaf); > return 0; > > out_free_roots: > @@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm, > mark_free(mm, block->left); > mark_free(mm, block->right); > > + list_add(&block->right->leaf_link, &block->leaf_link); > + list_add(&block->left->leaf_link, &block->leaf_link); > + list_del(&block->leaf_link); > mark_split(block); > > return 0; > @@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm, > break; > > list_del(&buddy->link); > + list_add(&parent->leaf_link, &block->leaf_link); > + list_del(&buddy->leaf_link); > + list_del(&block->leaf_link); > > drm_block_free(mm, block); > drm_block_free(mm, buddy); > @@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm, > return ERR_PTR(err); > } > > +static struct drm_buddy_block * > +find_continuous_blocks(struct drm_buddy *mm, > + int order, > + unsigned long flags, > + struct drm_buddy_block **rblock) > +{ > + struct list_head *head = &mm->free_list[order]; > + struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin; > + u64 pages = BIT(order + 1); > + u64 cur_pages; > + > + list_for_each_entry(free_block, head, link) { > + if (max_block) { > + if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION)) > + break; > + > + if (drm_buddy_block_offset(free_block) < > + drm_buddy_block_offset(max_block)) > + continue; > + } > + > + cur_pages = BIT(order); > + begin = end = free_block; > + while (true) { > + struct drm_buddy_block *prev, *next; > + int prev_order, next_order; > + > + prev = list_prev_entry(begin, leaf_link); > + if (!drm_buddy_block_is_free(prev) || > + drm_buddy_block_offset(prev) > > + drm_buddy_block_offset(begin)) { > + prev = NULL; > + } > + next = list_next_entry(end, leaf_link); > + if (!drm_buddy_block_is_free(next) || > + drm_buddy_block_offset(next) < > + drm_buddy_block_offset(end)) { > + next = NULL; > + } > + if (!prev && !next) > + break; > + > + prev_order = prev ? drm_buddy_block_order(prev) : -1; > + next_order = next ? drm_buddy_block_order(next) : -1; > + if (next_order >= prev_order) { > + BUG_ON(drm_buddy_block_offset(end) + > + drm_buddy_block_size(mm, end) != > + drm_buddy_block_offset(next)); > + end = next; > + cur_pages += BIT(drm_buddy_block_order(next)); > + } > + if (prev_order >= next_order) { > + BUG_ON(drm_buddy_block_offset(prev) + > + drm_buddy_block_size(mm, prev) != > + drm_buddy_block_offset(begin)); > + begin = prev; > + cur_pages += BIT(drm_buddy_block_order(prev)); > + } > + if (pages == cur_pages) > + break; > + BUG_ON(pages < cur_pages); > + } > + > + if (pages > cur_pages) > + continue; > + > + *rblock = end; > + max_block = begin; > + } > + return max_block; > +} > + > static struct drm_buddy_block * > get_maxblock(struct list_head *head) > { > @@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > struct list_head *blocks, > unsigned long flags) > { > - struct drm_buddy_block *block = NULL; > + struct drm_buddy_block *block = NULL, *rblock = NULL; > unsigned int min_order, order; > unsigned long pages; > LIST_HEAD(allocated); > @@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, > break; > > if (order-- == min_order) { > + if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) && > + min_order != 0 && pages == BIT(order + 1)) { > + block = find_continuous_blocks(mm, > + order, > + flags, > + &rblock); > + if (block) > + break; > + } > err = -ENOSPC; > goto err_free; > } > } while (1); > > - mark_allocated(block); > - mm->avail -= drm_buddy_block_size(mm, block); > - kmemleak_update_trace(block); > - list_add_tail(&block->link, &allocated); > - > - pages -= BIT(order); > + do { > + mark_allocated(block); > + mm->avail -= drm_buddy_block_size(mm, block); > + kmemleak_update_trace(block); > + list_add_tail(&block->link, &allocated); > + pages -= BIT(drm_buddy_block_order(block)); > + if (block == rblock || !rblock) > + break; > + block = list_next_entry(block, leaf_link); > + } while (true); > > if (!pages) > break; > diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h > index 572077ff8ae7..c5437bd4f4f3 100644 > --- a/include/drm/drm_buddy.h > +++ b/include/drm/drm_buddy.h > @@ -50,6 +50,7 @@ struct drm_buddy_block { > */ > struct list_head link; > struct list_head tmp_link; > + struct list_head leaf_link; > }; > > /* Order-zero must be at least PAGE_SIZE */
<<attachment: winmail.dat>>