Re: [PATCH 06/11] drm/i915: Allocate active tracking nodes from a slabcache

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 30/01/2019 02:19, Chris Wilson wrote:
Wrap the active tracking for a GPU references in a slabcache for faster
allocations, and keep track of inflight nodes so we can reap the
stale entries upon parking (thereby trimming our memory usage).

I suggest a two staged approach. First patch add a slab cache (you can also add kmem_cache_shrink on park as we do for other caches), then add the parking/reaping bit.

Under what scenarios we end up not freeing active nodes sufficiently? It would have to be some user which keeps many contexts around, having only used them once?


Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_active.c            | 55 ++++++++++++++++---
  drivers/gpu/drm/i915/i915_active.h            | 21 +++++--
  drivers/gpu/drm/i915/i915_active_types.h      | 12 +++-
  drivers/gpu/drm/i915/i915_drv.h               |  2 +
  drivers/gpu/drm/i915/i915_gem.c               | 16 +++++-
  drivers/gpu/drm/i915/i915_gem_gtt.c           |  2 +-
  drivers/gpu/drm/i915/i915_vma.c               |  3 +-
  drivers/gpu/drm/i915/selftests/i915_active.c  |  3 +-
  .../gpu/drm/i915/selftests/mock_gem_device.c  |  6 ++
  9 files changed, 100 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index e0182e19cb8b..3c7abbde42ac 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -7,7 +7,9 @@
  #include "i915_drv.h"
  #include "i915_active.h"
-#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+#define i915_from_gt(x) \
+	container_of(x, struct drm_i915_private, gt.active_refs)
+#define BKL(ref) (&i915_from_gt((ref)->gt)->drm.struct_mutex)
struct active_node {
  	struct i915_gem_active base;
@@ -79,11 +81,11 @@ active_instance(struct i915_active *ref, u64 idx)
  			p = &parent->rb_left;
  	}
- node = kmalloc(sizeof(*node), GFP_KERNEL);
+	node = kmem_cache_alloc(ref->gt->slab_cache, GFP_KERNEL);
/* kmalloc may retire the ref->last (thanks shrinker)! */
  	if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
-		kfree(node);
+		kmem_cache_free(ref->gt->slab_cache, node);
  		goto out;
  	}
@@ -94,6 +96,9 @@ active_instance(struct i915_active *ref, u64 idx)
  	node->ref = ref;
  	node->timeline = idx;
+ if (RB_EMPTY_ROOT(&ref->tree))
+		list_add(&ref->active_link, &ref->gt->active_refs);
+
  	rb_link_node(&node->node, parent, p);
  	rb_insert_color(&node->node, &ref->tree);
@@ -119,11 +124,11 @@ active_instance(struct i915_active *ref, u64 idx)
  	return &ref->last;
  }
-void i915_active_init(struct drm_i915_private *i915,
+void i915_active_init(struct i915_gt_active *gt,
  		      struct i915_active *ref,
  		      void (*retire)(struct i915_active *ref))
  {
-	ref->i915 = i915;
+	ref->gt = gt;
  	ref->retire = retire;
  	ref->tree = RB_ROOT;
  	init_request_active(&ref->last, last_retire);
@@ -161,6 +166,7 @@ void i915_active_release(struct i915_active *ref)
int i915_active_wait(struct i915_active *ref)
  {
+	struct kmem_cache *slab = ref->gt->slab_cache;
  	struct active_node *it, *n;
  	int ret;
@@ -168,15 +174,19 @@ int i915_active_wait(struct i915_active *ref)
  	if (ret)
  		return ret;
+ if (RB_EMPTY_ROOT(&ref->tree))
+		return 0;
+
  	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
  		ret = i915_gem_active_retire(&it->base, BKL(ref));
  		if (ret)
  			return ret;
GEM_BUG_ON(i915_gem_active_isset(&it->base));
-		kfree(it);
+		kmem_cache_free(slab, it);
  	}
  	ref->tree = RB_ROOT;
+	list_del(&ref->active_link);
return 0;
  }
@@ -210,15 +220,46 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
void i915_active_fini(struct i915_active *ref)
  {
+	struct kmem_cache *slab = ref->gt->slab_cache;
  	struct active_node *it, *n;
+ lockdep_assert_held(BKL(ref));
  	GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+ if (RB_EMPTY_ROOT(&ref->tree))
+		return;
+
  	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
  		GEM_BUG_ON(i915_gem_active_isset(&it->base));
-		kfree(it);
+		kmem_cache_free(slab, it);
  	}
  	ref->tree = RB_ROOT;
+	list_del(&ref->active_link);
+}
+
+int i915_gt_active_init(struct i915_gt_active *gt)
+{
+	gt->slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+	if (!gt->slab_cache)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&gt->active_refs);
+
+	return 0;
+}
+
+void i915_gt_active_park(struct i915_gt_active *gt)
+{
+	struct i915_active *it, *n;
+
+	list_for_each_entry_safe(it, n, &gt->active_refs, active_link)
+		i915_active_fini(it);
+}
+
+void i915_gt_active_fini(struct i915_gt_active *gt)
+{
+	GEM_BUG_ON(!list_empty(&gt->active_refs));
+	kmem_cache_destroy(gt->slab_cache);
  }
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index c0729a046f98..41c4a5da84c8 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -9,10 +9,6 @@
#include "i915_active_types.h" -#include <linux/rbtree.h>
-
-#include "i915_request.h"
-
  /*
   * GPU activity tracking
   *
@@ -39,7 +35,7 @@
   * synchronisation.
   */
-void i915_active_init(struct drm_i915_private *i915,
+void i915_active_init(struct i915_gt_active *gt,
  		      struct i915_active *ref,
  		      void (*retire)(struct i915_active *ref));
@@ -63,4 +59,19 @@ i915_active_is_idle(const struct i915_active *ref) void i915_active_fini(struct i915_active *ref); +/*
+ * Active refs memory management
+ *
+ * To be more economical with memory, we reap all the i915_active trees on
+ * parking the GPU (when we know the GPU is inactive) and allocate the nodes
+ * from a local slab cache to hopefully reduce the fragmentation as we will
+ * then be able to free all pages en masse upon idling.
+ */
+
+int i915_gt_active_init(struct i915_gt_active *gt);
+void i915_gt_active_park(struct i915_gt_active *gt);
+void i915_gt_active_fini(struct i915_gt_active *gt);
+
+#define i915_gt_active(i915) (&(i915)->gt.active_refs)
+
  #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index 411e502ed8dd..3d41c33ca78c 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,14 +7,17 @@
  #ifndef _I915_ACTIVE_TYPES_H_
  #define _I915_ACTIVE_TYPES_H_
+#include <linux/list.h>
  #include <linux/rbtree.h>
#include "i915_request.h" -struct drm_i915_private;
+struct i915_gt_active;
+struct kmem_cache;
struct i915_active {
-	struct drm_i915_private *i915;
+	struct i915_gt_active *gt;

gt_active would be better - gt is to vague.

+	struct list_head active_link;
struct rb_root tree;
  	struct i915_gem_active last;
@@ -23,4 +26,9 @@ struct i915_active {
  	void (*retire)(struct i915_active *ref);
  };
+struct i915_gt_active {
+	struct list_head active_refs;
+	struct kmem_cache *slab_cache;
+};
+
  #endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8ec28a7f5452..480ab3e00ba8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1984,6 +1984,8 @@ struct drm_i915_private {
  			struct list_head hwsp_free_list;
  		} timelines;
+ struct i915_gt_active active_refs;
+
  		struct list_head active_rings;
  		struct list_head closed_vma;
  		u32 active_requests;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index caccff87a2a1..2bc735df408b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -130,6 +130,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915)
intel_engines_park(i915);
  	i915_timelines_park(i915);
+	i915_gt_active_park(i915_gt_active(i915));

The i915_gt_active macro is just too horrible IMHO. Why? :)

Make i915_gt_active_park take i915, or i915->gt_active.

i915_pmu_gt_parked(i915);
  	i915_vma_parked(i915);
@@ -4998,15 +4999,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
  		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
  	}
+ ret = i915_gt_active_init(i915_gt_active(dev_priv));
+	if (ret)
+		return ret;
+
  	i915_timelines_init(dev_priv);
ret = i915_gem_init_userptr(dev_priv);
  	if (ret)
-		return ret;
+		goto err_timelines;
ret = intel_uc_init_misc(dev_priv);
  	if (ret)
-		return ret;
+		goto err_userptr;
ret = intel_wopcm_init(&dev_priv->wopcm);
  	if (ret)
@@ -5122,9 +5127,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
  err_uc_misc:
  	intel_uc_fini_misc(dev_priv);
- if (ret != -EIO) {
+err_userptr:
+	if (ret != -EIO)
  		i915_gem_cleanup_userptr(dev_priv);
+err_timelines:
+	if (ret != -EIO) {
  		i915_timelines_fini(dev_priv);
+		i915_gt_active_fini(i915_gt_active(dev_priv));
  	}
if (ret == -EIO) {
@@ -5177,6 +5186,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
  	intel_uc_fini_misc(dev_priv);
  	i915_gem_cleanup_userptr(dev_priv);
  	i915_timelines_fini(dev_priv);
+	i915_gt_active_fini(i915_gt_active(dev_priv));
i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e625659c03a2..d8819de0d6ee 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1917,7 +1917,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
  	if (!vma)
  		return ERR_PTR(-ENOMEM);
- i915_active_init(i915, &vma->active, NULL);
+	i915_active_init(i915_gt_active(i915), &vma->active, NULL);
  	init_request_active(&vma->last_fence, NULL);
vma->vm = &ggtt->vm;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index d4772061e642..2456bfb4877b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -119,7 +119,8 @@ vma_create(struct drm_i915_gem_object *obj,
  	if (vma == NULL)
  		return ERR_PTR(-ENOMEM);
- i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
+	i915_active_init(i915_gt_active(vm->i915),
+			 &vma->active, __i915_vma_retire);
  	init_request_active(&vma->last_fence, NULL);
vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 7c5c3068565b..0e923476920e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -30,7 +30,8 @@ static int __live_active_setup(struct drm_i915_private *i915,
  	unsigned int count = 0;
  	int err = 0;
- i915_active_init(i915, &active->base, __live_active_retire);
+	i915_active_init(i915_gt_active(i915),
+			 &active->base, __live_active_retire);
  	active->retired = false;
if (!i915_active_acquire(&active->base)) {
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 074a0d9cbf26..5b88f74c1677 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -69,6 +69,7 @@ static void mock_device_release(struct drm_device *dev)
  	mutex_unlock(&i915->drm.struct_mutex);
i915_timelines_fini(i915);
+	i915_gt_active_fini(i915_gt_active(i915));
drain_workqueue(i915->wq);
  	i915_gem_drain_freed_objects(i915);
@@ -228,6 +229,9 @@ struct drm_i915_private *mock_gem_device(void)
  	if (!i915->priorities)
  		goto err_dependencies;
+ if (i915_gt_active_init(i915_gt_active(i915)))
+		goto err_priorities;
+
  	i915_timelines_init(i915);
INIT_LIST_HEAD(&i915->gt.active_rings);
@@ -257,6 +261,8 @@ struct drm_i915_private *mock_gem_device(void)
  err_unlock:
  	mutex_unlock(&i915->drm.struct_mutex);
  	i915_timelines_fini(i915);
+	i915_gt_active_fini(i915_gt_active(i915));
+err_priorities:
  	kmem_cache_destroy(i915->priorities);
  err_dependencies:
  	kmem_cache_destroy(i915->dependencies);


Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux