Am 20.06.24 um 18:01 schrieb Nirmoy Das:
Currently ttm pool is not honoring TTM_TT_FLAG_ZERO_ALLOC flag and
clearing pages on free. It does help with allocation latency but clearing
happens even if drm driver doesn't passes the flag. If clear on free
is needed then a new flag can be added for that purpose.
Mhm, thinking more about it that will most likely get push back from
others as well.
How about the attached patch? We just skip clearing pages when the
driver set the ZERO_ALLOC flag again before freeing them.
Maybe rename the flag or add a new one for that, but in general that
looks like the option with the least impact.
Regards,
Christian.
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: "Thomas Hellström" <thomas.hellstrom@xxxxxxxxxxxxxxx>
Cc: Matthew Auld <matthew.auld@xxxxxxxxx>
Signed-off-by: Nirmoy Das <nirmoy.das@xxxxxxxxx>
---
drivers/gpu/drm/ttm/ttm_pool.c | 31 +++++++++++++++++--------------
1 file changed, 17 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..cbbd722185ee 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -224,15 +224,6 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
/* Give pages into a specific pool_type */
static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
{
- unsigned int i, num_pages = 1 << pt->order;
-
- for (i = 0; i < num_pages; ++i) {
- if (PageHighMem(p))
- clear_highpage(p + i);
- else
- clear_page(page_address(p + i));
- }
-
spin_lock(&pt->lock);
list_add(&p->lru, &pt->pages);
spin_unlock(&pt->lock);
@@ -240,15 +231,26 @@ static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
}
/* Take pages from a specific pool_type, return NULL when nothing available */
-static struct page *ttm_pool_type_take(struct ttm_pool_type *pt)
+static struct page *ttm_pool_type_take(struct ttm_pool_type *pt, bool clear)
{
struct page *p;
spin_lock(&pt->lock);
p = list_first_entry_or_null(&pt->pages, typeof(*p), lru);
if (p) {
+ unsigned int i, num_pages = 1 << pt->order;
+
atomic_long_sub(1 << pt->order, &allocated_pages);
list_del(&p->lru);
+ if (clear) {
+ for (i = 0; i < num_pages; ++i) {
+ if (PageHighMem(p))
+ clear_highpage(p + i);
+ else
+ clear_page(page_address(p + i));
+ }
+ }
+
}
spin_unlock(&pt->lock);
@@ -279,7 +281,7 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
list_del(&pt->shrinker_list);
spin_unlock(&shrinker_lock);
- while ((p = ttm_pool_type_take(pt)))
+ while ((p = ttm_pool_type_take(pt, false)))
ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
}
@@ -330,7 +332,7 @@ static unsigned int ttm_pool_shrink(void)
list_move_tail(&pt->shrinker_list, &shrinker_list);
spin_unlock(&shrinker_lock);
- p = ttm_pool_type_take(pt);
+ p = ttm_pool_type_take(pt, false);
if (p) {
ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
num_pages = 1 << pt->order;
@@ -457,10 +459,11 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
num_pages;
order = min_t(unsigned int, order, __fls(num_pages))) {
struct ttm_pool_type *pt;
+ bool clear = tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC;
page_caching = tt->caching;
pt = ttm_pool_select_type(pool, tt->caching, order);
- p = pt ? ttm_pool_type_take(pt) : NULL;
+ p = pt ? ttm_pool_type_take(pt, clear) : NULL;
if (p) {
r = ttm_pool_apply_caching(caching, pages,
tt->caching);
@@ -480,7 +483,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
if (num_pages < (1 << order))
break;
- p = ttm_pool_type_take(pt);
+ p = ttm_pool_type_take(pt, clear);
} while (p);
}
From 466b7b315af74bae635b9245a1d9e6619a3da171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@xxxxxxx>
Date: Fri, 21 Jun 2024 16:50:59 +0200
Subject: [PATCH] drm/ttm: skip page clear if ZERO_ALLOC flag is set on free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This allows the driver to clear the pages using some DMA instead of the
CPU.
Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
drivers/gpu/drm/ttm/ttm_pool.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 6e1fd6985ffc..6add5006c575 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr,
}
/* Give pages into a specific pool_type */
-static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p)
+static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p,
+ bool cleared)
{
unsigned int i, num_pages = 1 << pt->order;
- for (i = 0; i < num_pages; ++i) {
- if (PageHighMem(p))
- clear_highpage(p + i);
- else
- clear_page(page_address(p + i));
+ if (!cleared) {
+ for (i = 0; i < num_pages; ++i) {
+ if (PageHighMem(p))
+ clear_highpage(p + i);
+ else
+ clear_page(page_address(p + i));
+ }
}
spin_lock(&pt->lock);
@@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
pgoff_t start_page, pgoff_t end_page)
{
struct page **pages = &tt->pages[start_page];
+ bool cleared = tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC;
unsigned int order;
pgoff_t i, nr;
@@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
pt = ttm_pool_select_type(pool, caching, order);
if (pt)
- ttm_pool_type_give(pt, *pages);
+ ttm_pool_type_give(pt, *pages, cleared);
else
ttm_pool_free_page(pool, caching, order, *pages);
}
@@ -517,6 +521,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
if (r)
goto error_free_all;
+ tt->page_flags &= ~TTM_TT_FLAG_ZERO_ALLOC;
return 0;
error_free_page:
--
2.34.1