回复: [PATCH v4] drm: Optimise for continuous memory allocation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - General]

In one ROCM +  gdm restart test,
find_continuous_blocks() succeed with ratio 35%.
the cod coverage report is below.

     772        3998 :                         if (order-- == min_order) {
     773         352 :                                 if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
     774         352 :                                     min_order != 0 && pages == BIT(order + 1)) {
     775          79 :                                         block = find_continuous_blocks(mm,
     776             :                                                                        order,
     777             :                                                                        flags,
     778             :                                                                        &rblock);
     779          79 :                                         if (block)
     780             :                                                 break;
     781             :                                 }
     782         300 :                                 err = -ENOSPC;
     783         300 :                                 goto err_free;

thanks
xinhui
________________________________________
发件人: Pan, Xinhui <Xinhui.Pan@xxxxxxx>
发送时间: 2022年11月29日 18:56
收件人: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
抄送: daniel@xxxxxxxx; matthew.auld@xxxxxxxxx; Koenig, Christian; dri-devel@xxxxxxxxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Paneer Selvam, Arunpravin; intel-gfx@xxxxxxxxxxxxxxxxxxxxx; Pan, Xinhui
主题: [PATCH v4] drm: Optimise for continuous memory allocation

Currently drm-buddy does not have full knowledge of continuous memory.

Lets consider scenario below.
order 1:    L               R
order 0: LL     LR      RL      RR
for order 1 allocation, it can offer L or R or LR+RL.

For now, we only implement L or R case for continuous memory allocation.
So this patch aims to implement the rest cases.

Adding a new member leaf_link which links all leaf blocks in asceding
order. Now we can find more than 2 sub-order blocks easier.
Say, order 4 can be combined with corresponding order 4, 2+2, 1+2+1,
0+1+2+0, 0+2+1+0.

Signed-off-by: xinhui pan <xinhui.pan@xxxxxxx>
---
change from v3:
reworked totally. adding leaf_link.

change from v2:
search continuous block in nearby root if needed

change from v1:
implement top-down continuous allocation
---
 drivers/gpu/drm/drm_buddy.c | 108 +++++++++++++++++++++++++++++++++---
 include/drm/drm_buddy.h     |   1 +
 2 files changed, 102 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index 11bb59399471..8edafb99b02c 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -80,6 +80,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
 {
        unsigned int i;
        u64 offset;
+       LIST_HEAD(leaf);

        if (size < chunk_size)
                return -EINVAL;
@@ -136,6 +137,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
                        goto out_free_roots;

                mark_free(mm, root);
+               list_add_tail(&root->leaf_link, &leaf);

                BUG_ON(i > mm->max_order);
                BUG_ON(drm_buddy_block_size(mm, root) < chunk_size);
@@ -147,6 +149,7 @@ int drm_buddy_init(struct drm_buddy *mm, u64 size, u64 chunk_size)
                i++;
        } while (size);

+       list_del(&leaf);
        return 0;

 out_free_roots:
@@ -205,6 +208,9 @@ static int split_block(struct drm_buddy *mm,
        mark_free(mm, block->left);
        mark_free(mm, block->right);

+       list_add(&block->right->leaf_link, &block->leaf_link);
+       list_add(&block->left->leaf_link, &block->leaf_link);
+       list_del(&block->leaf_link);
        mark_split(block);

        return 0;
@@ -256,6 +262,9 @@ static void __drm_buddy_free(struct drm_buddy *mm,
                        break;

                list_del(&buddy->link);
+               list_add(&parent->leaf_link, &block->leaf_link);
+               list_del(&buddy->leaf_link);
+               list_del(&block->leaf_link);

                drm_block_free(mm, block);
                drm_block_free(mm, buddy);
@@ -386,6 +395,78 @@ alloc_range_bias(struct drm_buddy *mm,
        return ERR_PTR(err);
 }

+static struct drm_buddy_block *
+find_continuous_blocks(struct drm_buddy *mm,
+                      int order,
+                      unsigned long flags,
+                      struct drm_buddy_block **rblock)
+{
+       struct list_head *head = &mm->free_list[order];
+       struct drm_buddy_block *free_block, *max_block = NULL, *end, *begin;
+       u64 pages = BIT(order + 1);
+       u64 cur_pages;
+
+       list_for_each_entry(free_block, head, link) {
+               if (max_block) {
+                       if (!(flags & DRM_BUDDY_TOPDOWN_ALLOCATION))
+                               break;
+
+                       if (drm_buddy_block_offset(free_block) <
+                           drm_buddy_block_offset(max_block))
+                               continue;
+               }
+
+               cur_pages = BIT(order);
+               begin = end = free_block;
+               while (true) {
+                       struct drm_buddy_block *prev, *next;
+                       int prev_order, next_order;
+
+                       prev = list_prev_entry(begin, leaf_link);
+                       if (!drm_buddy_block_is_free(prev) ||
+                           drm_buddy_block_offset(prev) >
+                           drm_buddy_block_offset(begin)) {
+                               prev = NULL;
+                       }
+                       next = list_next_entry(end, leaf_link);
+                       if (!drm_buddy_block_is_free(next) ||
+                           drm_buddy_block_offset(next) <
+                           drm_buddy_block_offset(end)) {
+                               next = NULL;
+                       }
+                       if (!prev && !next)
+                               break;
+
+                       prev_order = prev ? drm_buddy_block_order(prev) : -1;
+                       next_order = next ? drm_buddy_block_order(next) : -1;
+                       if (next_order >= prev_order) {
+                               BUG_ON(drm_buddy_block_offset(end) +
+                                      drm_buddy_block_size(mm, end) !=
+                                      drm_buddy_block_offset(next));
+                               end = next;
+                               cur_pages += BIT(drm_buddy_block_order(next));
+                       }
+                       if (prev_order >= next_order) {
+                               BUG_ON(drm_buddy_block_offset(prev) +
+                                      drm_buddy_block_size(mm, prev) !=
+                                      drm_buddy_block_offset(begin));
+                               begin = prev;
+                               cur_pages += BIT(drm_buddy_block_order(prev));
+                       }
+                       if (pages == cur_pages)
+                               break;
+                       BUG_ON(pages < cur_pages);
+               }
+
+               if (pages > cur_pages)
+                       continue;
+
+               *rblock = end;
+               max_block = begin;
+       }
+       return max_block;
+}
+
 static struct drm_buddy_block *
 get_maxblock(struct list_head *head)
 {
@@ -637,7 +718,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
                           struct list_head *blocks,
                           unsigned long flags)
 {
-       struct drm_buddy_block *block = NULL;
+       struct drm_buddy_block *block = NULL, *rblock = NULL;
        unsigned int min_order, order;
        unsigned long pages;
        LIST_HEAD(allocated);
@@ -689,17 +770,30 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm,
                                break;

                        if (order-- == min_order) {
+                               if (!(flags & DRM_BUDDY_RANGE_ALLOCATION) &&
+                                   min_order != 0 && pages == BIT(order + 1)) {
+                                       block = find_continuous_blocks(mm,
+                                                                      order,
+                                                                      flags,
+                                                                      &rblock);
+                                       if (block)
+                                               break;
+                               }
                                err = -ENOSPC;
                                goto err_free;
                        }
                } while (1);

-               mark_allocated(block);
-               mm->avail -= drm_buddy_block_size(mm, block);
-               kmemleak_update_trace(block);
-               list_add_tail(&block->link, &allocated);
-
-               pages -= BIT(order);
+               do {
+                       mark_allocated(block);
+                       mm->avail -= drm_buddy_block_size(mm, block);
+                       kmemleak_update_trace(block);
+                       list_add_tail(&block->link, &allocated);
+                       pages -= BIT(drm_buddy_block_order(block));
+                       if (block == rblock || !rblock)
+                               break;
+                       block = list_next_entry(block, leaf_link);
+               } while (true);

                if (!pages)
                        break;
diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h
index 572077ff8ae7..c5437bd4f4f3 100644
--- a/include/drm/drm_buddy.h
+++ b/include/drm/drm_buddy.h
@@ -50,6 +50,7 @@ struct drm_buddy_block {
         */
        struct list_head link;
        struct list_head tmp_link;
+       struct list_head leaf_link;
 };

 /* Order-zero must be at least PAGE_SIZE */
--
2.34.1

<<attachment: winmail.dat>>


[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux