FAILED: patch "[PATCH] drm/ttm: stop pooling cached NUMA pages v2" failed to apply to 6.6-stable tree

<gregkh@xxxxxxxxxxxxxxxxxxx> · Tue, 23 Apr 2024 06:07:41 -0700

The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable@xxxxxxxxxxxxxxx>.

To reproduce the conflict and resubmit, you may use the following commands:

git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x b6976f323a8687cc0d55bc92c2086fd934324ed5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable@xxxxxxxxxxxxxxx>' --in-reply-to '2024042340-laborious-overact-587b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..

Possible dependencies:

b6976f323a86 ("drm/ttm: stop pooling cached NUMA pages v2")
fd37721803c6 ("mm, treewide: introduce NR_PAGE_ORDERS")

thanks,

greg k-h

------------------ original commit in Linus's tree ------------------

>From b6976f323a8687cc0d55bc92c2086fd934324ed5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@xxxxxxxxx>
Date: Mon, 15 Apr 2024 15:48:21 +0200
Subject: [PATCH] drm/ttm: stop pooling cached NUMA pages v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We only pool write combined and uncached allocations because they
require extra overhead on allocation and release.

If we also pool cached NUMA it not only means some extra unnecessary
overhead, but also that under memory pressure it can happen that
pages from the wrong NUMA node enters the pool and are re-used
over and over again.

This can lead to performance reduction after running into memory
pressure.

v2: restructure and cleanup the code a bit from the internal hack to
    test this.

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
CC: stable@xxxxxxxxxxxxxxx
Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>
Link: https://patchwork.freedesktop.org/patch/msgid/20240415134821.1919-1-christian.koenig@xxxxxxx

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 112438d965ff..6e1fd6985ffc 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
 						  enum ttm_caching caching,
 						  unsigned int order)
 {
-	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+	if (pool->use_dma_alloc)
 		return &pool->caching[caching].orders[order];
 
 #ifdef CONFIG_X86
 	switch (caching) {
 	case ttm_write_combined:
+		if (pool->nid != NUMA_NO_NODE)
+			return &pool->caching[caching].orders[order];
+
 		if (pool->use_dma32)
 			return &global_dma32_write_combined[order];
 
 		return &global_write_combined[order];
 	case ttm_uncached:
+		if (pool->nid != NUMA_NO_NODE)
+			return &pool->caching[caching].orders[order];
+
 		if (pool->use_dma32)
 			return &global_dma32_uncached[order];
 
@@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 	pool->use_dma_alloc = use_dma_alloc;
 	pool->use_dma32 = use_dma32;
 
-	if (use_dma_alloc || nid != NUMA_NO_NODE) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < NR_PAGE_ORDERS; ++j)
-				ttm_pool_type_init(&pool->caching[i].orders[j],
-						   pool, i, j);
+	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+			struct ttm_pool_type *pt;
+
+			/* Initialize only pool types which are actually used */
+			pt = ttm_pool_select_type(pool, i, j);
+			if (pt != &pool->caching[i].orders[j])
+				continue;
+
+			ttm_pool_type_init(pt, pool, i, j);
+		}
 	}
 }
 EXPORT_SYMBOL(ttm_pool_init);
@@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
 {
 	unsigned int i, j;
 
-	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < NR_PAGE_ORDERS; ++j)
-				ttm_pool_type_fini(&pool->caching[i].orders[j]);
+	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+			struct ttm_pool_type *pt;
+
+			pt = ttm_pool_select_type(pool, i, j);
+			if (pt != &pool->caching[i].orders[j])
+				continue;
+
+			ttm_pool_type_fini(pt);
+		}
 	}
 
 	/* We removed the pool types from the LRU, but we need to also make sure