Re: [PATCH v6 11/12] drm/ttm, drm/xe: Add a shrinker for xe bos

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 03/07/2024 16:38, Thomas Hellström wrote:
Rather than relying on the TTM watermark accounting add a shrinker
for xe_bos in TT or system memory.

Leverage the newly added TTM per-page shrinking and shmem backup
support.

Although xe doesn't fully support WONTNEED (purgeable) bos yet,
introduce and add shrinker support for purgeable ttm_tts.

v2:
- Cleanups bugfixes and a KUNIT shrinker test.
- Add writeback support, and activate if kswapd.
v3:
- Move the try_shrink() helper to core TTM.
- Minor cleanups.
v4:
- Add runtime pm for the shrinker. Shrinking may require an active
   device for CCS metadata copying.
v5:
- Separately purge ghost- and zombie objects in the shrinker.
- Fix a format specifier - type inconsistency. (Kernel test robot).

Cc: Christian König <christian.koenig@xxxxxxx>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@xxxxxxx>
Cc: Matthew Brost <matthew.brost@xxxxxxxxx>
Cc: <dri-devel@xxxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/ttm/ttm_bo_util.c     |  67 ++++++
  drivers/gpu/drm/xe/Makefile           |   1 +
  drivers/gpu/drm/xe/tests/xe_bo.c      | 118 +++++++++++
  drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
  drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
  drivers/gpu/drm/xe/xe_bo.c            | 155 ++++++++++++--
  drivers/gpu/drm/xe/xe_bo.h            |  26 +++
  drivers/gpu/drm/xe/xe_device.c        |   8 +
  drivers/gpu/drm/xe/xe_device_types.h  |   2 +
  drivers/gpu/drm/xe/xe_shrinker.c      | 287 ++++++++++++++++++++++++++
  drivers/gpu/drm/xe/xe_shrinker.h      |  18 ++
  include/drm/ttm/ttm_bo.h              |   3 +
  12 files changed, 671 insertions(+), 16 deletions(-)
  create mode 100644 drivers/gpu/drm/xe/xe_shrinker.c
  create mode 100644 drivers/gpu/drm/xe/xe_shrinker.h

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index c4f678f30fc2..563e96a4cf06 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -924,3 +924,70 @@ long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
return progress;
  }
+EXPORT_SYMBOL(ttm_lru_walk_for_evict);
+
+/**
+ * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
+ * @walk: The struct xe_ttm_lru_walk that describes the walk.
+ * @bo: The buffer object.
+ * @purge: Whether to attempt to purge the bo content since it's no
+ * longer needed.
+ * @writeback: If !@purge, attempt to write out to persistent storage.
+ *
+ * The function uses the ttm_tt_back_up functionality to back up or
+ * purge a struct ttm_tt. If the bo is not in system, it's first
+ * moved there.
+ *
+ * Return: The number of pages shrunken or purged, or
+ * negative error code on failure.
+ */
+long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
+		       bool purge, bool writeback)
+{
+	static const struct ttm_place sys_placement_flags = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.mem_type = TTM_PL_SYSTEM,
+		.flags = 0,
+	};
+	static struct ttm_placement sys_placement = {
+		.num_placement = 1,
+		.placement = &sys_placement_flags,
+	};
+	struct ttm_operation_ctx *ctx = walk->ctx;
+	struct ttm_tt *tt = bo->ttm;
+	long lret;
+
+	dma_resv_assert_held(bo->base.resv);
+
+	if (!tt || !ttm_tt_is_populated(tt))
+		return 0;
+
+	if (bo->resource->mem_type != TTM_PL_SYSTEM) {
+		int ret = ttm_bo_validate(bo, &sys_placement, ctx);
+
+		if (ret) {
+			if (ret == -EINTR || ret == -EDEADLK ||
+			    ret == -ERESTARTSYS)
+				return ret;
+			return 0;
+		}
+	}
+
+	lret = ttm_bo_wait_ctx(bo, ctx);
+	if (lret < 0) {
+		if (lret == -ERESTARTSYS)
+			return lret;
+		return 0;
+	}
+
+	if (bo->deleted)
+		lret = ttm_tt_backup(bo->bdev, tt, true, writeback);
+	else
+		lret = ttm_tt_backup(bo->bdev, tt, purge, writeback);
+	if (lret < 0 && lret != -EINTR)
+		return 0;
+
+	return lret;
+}
+EXPORT_SYMBOL(ttm_bo_try_shrink);
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index b1e03bfe4a68..1eba51bdd172 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -112,6 +112,7 @@ xe-y += xe_bb.o \
  	xe_ring_ops.o \
  	xe_sa.o \
  	xe_sched_job.o \
+	xe_shrinker.o \
  	xe_step.o \
  	xe_sync.o \
  	xe_tile.o \
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 9f3c02826464..49617f16dc76 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -6,6 +6,8 @@
  #include <kunit/test.h>
  #include <kunit/visibility.h>
+#include <uapi/linux/sysinfo.h>
+
  #include "tests/xe_bo_test.h"
  #include "tests/xe_pci_test.h"
  #include "tests/xe_test.h"
@@ -350,3 +352,119 @@ void xe_bo_evict_kunit(struct kunit *test)
  	xe_call_for_each_device(evict_test_run_device);
  }
  EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
+
+struct xe_bo_link {
+	struct list_head link;
+	struct xe_bo *bo;
+};
+
+#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
+
+/*
+ * Try to create system bos corresponding to twice the amount
+ * of available system memory to test shrinker functionality.
+ * If no swap space is available to accommodate the
+ * memory overcommit, mark bos purgeable.
+ */
+static int shrink_test_run_device(struct xe_device *xe)
+{
+	struct kunit *test = xe_cur_kunit();
+	LIST_HEAD(bos);
+	struct xe_bo_link *link, *next;
+	struct sysinfo si;
+	size_t total, alloced;
+	unsigned int interrupted = 0, successful = 0;
+
+	si_meminfo(&si);
+	total = si.freeram * si.mem_unit;
+
+	kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
+		   (unsigned long) total);
+
+	total <<= 1;
+	for (alloced = 0; alloced < total ; alloced += XE_BO_SHRINK_SIZE) {
+		struct xe_bo *bo;
+		unsigned int mem_type;
+
+		link = kzalloc(sizeof(*link), GFP_KERNEL);
+		if (!link) {
+			KUNIT_FAIL(test, "Unexpeced link allocation failure\n");
+			break;
+		}
+
+		INIT_LIST_HEAD(&link->link);
+
+		/* We can create bos using WC caching here. But it is slower. */
+		bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+				       DRM_XE_GEM_CPU_CACHING_WB,
+				       ttm_bo_type_device,
+				       XE_BO_FLAG_SYSTEM);
+		if (IS_ERR(bo)) {
+			if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
+			    bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
+				KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
+			kfree(link);
+			break;
+		}
+		link->bo = bo;
+		list_add_tail(&link->link, &bos);
+		xe_bo_lock(bo, false);
+
+		/*
+		 * If we're low on swap entries, we can't shrink unless the bo
+		 * is marked purgeable.
+		 */
+		if (get_nr_swap_pages() < (XE_BO_SHRINK_SIZE >> PAGE_SHIFT) * 128) {
+			struct xe_ttm_tt *xe_tt =
+				container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
+			long num_pages = xe_tt->ttm.num_pages;
+
+			xe_tt->purgeable = true;
+			xe_shrinker_mod_pages(xe->mem.shrinker, -num_pages,
+					      num_pages);
+		}
+
+		mem_type = bo->ttm.resource->mem_type;
+		xe_bo_unlock(bo);
+		if (mem_type != XE_PL_TT)
+			KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
+				   bo->ttm.resource->mem_type);
+		cond_resched();
+		if (signal_pending(current))
+			break;
+	}
+
+	/* Read back and destroy bos */
+	list_for_each_entry_safe_reverse(link, next, &bos, link) {
+		static struct ttm_operation_ctx ctx = {.interruptible = true};
+		struct xe_bo *bo = link->bo;
+		int ret;
+
+		if (!signal_pending(current)) {
+			xe_bo_lock(bo, NULL);
+			ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
+			xe_bo_unlock(bo);
+			if (ret && ret != -EINTR)
+				KUNIT_FAIL(test, "Validation failed: %pe\n",
+					   ERR_PTR(ret));
+			else if (ret)
+				interrupted++;
+			else
+				successful++;
+		}
+		xe_bo_put(link->bo);
+		list_del(&link->link);
+		kfree(link);
+		cond_resched();
+	}
+	kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
+		   interrupted, successful);
+
+	return 0;
+}
+
+void xe_bo_shrink_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(shrink_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
index a324cde77db8..317fa923e287 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -10,6 +10,7 @@
  static struct kunit_case xe_bo_tests[] = {
  	KUNIT_CASE(xe_ccs_migrate_kunit),
  	KUNIT_CASE(xe_bo_evict_kunit),
+	KUNIT_CASE_SLOW(xe_bo_shrink_kunit),
  	{}
  };
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
index 0113ab45066a..7f44d14a45c5 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h
@@ -10,5 +10,6 @@ struct kunit;
void xe_ccs_migrate_kunit(struct kunit *test);
  void xe_bo_evict_kunit(struct kunit *test);
+void xe_bo_shrink_kunit(struct kunit *test);
#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 65c696966e96..6ab63d1642ae 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -10,6 +10,7 @@
  #include <drm/drm_drv.h>
  #include <drm/drm_gem_ttm_helper.h>
  #include <drm/drm_managed.h>
+#include <drm/ttm/ttm_backup.h>
  #include <drm/ttm/ttm_device.h>
  #include <drm/ttm/ttm_placement.h>
  #include <drm/ttm/ttm_tt.h>
@@ -25,6 +26,7 @@
  #include "xe_pm.h"
  #include "xe_preempt_fence.h"
  #include "xe_res_cursor.h"
+#include "xe_shrinker.h"
  #include "xe_trace_bo.h"
  #include "xe_ttm_stolen_mgr.h"
  #include "xe_vm.h"
@@ -278,11 +280,15 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
  	}
  }
+/* struct xe_ttm_tt - Subclassed ttm_tt for xe */
  struct xe_ttm_tt {
  	struct ttm_tt ttm;
-	struct device *dev;
+	/** @xe - The xe device */
+	struct xe_device *xe;
  	struct sg_table sgt;
  	struct sg_table *sg;
+	/** @purgeable - Whether the bo is purgeable (WONTNEED) */
+	bool purgeable;
  };
static int xe_tt_map_sg(struct ttm_tt *tt)
@@ -291,7 +297,8 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
  	unsigned long num_pages = tt->num_pages;
  	int ret;
- XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
+	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
+		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
if (xe_tt->sg)
  		return 0;
@@ -299,13 +306,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
  	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
  						num_pages, 0,
  						(u64)num_pages << PAGE_SHIFT,
-						xe_sg_segment_size(xe_tt->dev),
+						xe_sg_segment_size(xe_tt->xe->drm.dev),
  						GFP_KERNEL);
  	if (ret)
  		return ret;
xe_tt->sg = &xe_tt->sgt;
-	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+	ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
  			      DMA_ATTR_SKIP_CPU_SYNC);
  	if (ret) {
  		sg_free_table(xe_tt->sg);
@@ -321,7 +328,7 @@ static void xe_tt_unmap_sg(struct ttm_tt *tt)
  	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
if (xe_tt->sg) {
-		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
+		dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
  				  DMA_BIDIRECTIONAL, 0);
  		sg_free_table(xe_tt->sg);
  		xe_tt->sg = NULL;
@@ -336,21 +343,41 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo)
  	return xe_tt->sg;
  }
+/*
+ * Account ttm pages against the device shrinker's shrinkable and
+ * purgeable counts.
+ */
+static void xe_ttm_tt_account(struct ttm_tt *tt, bool add)
+{
+	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
+	long num_pages = tt->num_pages;
+
+	if (!add)
+		num_pages = -num_pages;
+
+	if (xe_tt->purgeable)
+		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, num_pages);
+	else
+		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, num_pages, 0);
+}
+
  static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
  				       u32 page_flags)
  {
  	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
  	struct xe_device *xe = xe_bo_device(bo);
-	struct xe_ttm_tt *tt;
+	struct xe_ttm_tt *xe_tt;
+	struct ttm_tt *tt;
  	unsigned long extra_pages;
  	enum ttm_caching caching;
  	int err;
- tt = kzalloc(sizeof(*tt), GFP_KERNEL);
-	if (!tt)
+	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
+	if (!xe_tt)
  		return NULL;
- tt->dev = xe->drm.dev;
+	tt = &xe_tt->ttm;
+	xe_tt->xe = xe;
extra_pages = 0;
  	if (xe_bo_needs_ccs_pages(bo))
@@ -387,42 +414,128 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
  		caching = ttm_uncached;
  	}
- err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
+	if (ttm_bo->type != ttm_bo_type_sg)
+		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
  	if (err) {
-		kfree(tt);
+		kfree(xe_tt);
  		return NULL;
  	}
- return &tt->ttm;
+	tt->backup = ttm_backup_shmem_create(tt->num_pages << PAGE_SHIFT);

I guess we should make this (loff_t)tt->num_pages << PAGE_SHIFT or similar?



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux