As kmem_caches share the same properties (size, allocation/free behaviour)
for all potential devices, we can use global caches. While this
potential has worse fragmentation behaviour (one can argue that
different devices would have different activity lifetimes, but you can
also argue that activity is temporal across the system) it is the
default behaviour of the system at large to amalgamate matching caches.
The benefit for us is much reduced pointer dancing along the frequent
allocation paths.
v2: Defer shrinking until after a global grace period for futureproofing
multiple consumers of the slab caches, similar to the current strategy
for avoiding shrinking too early.
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_active.c | 7 +-
drivers/gpu/drm/i915/i915_active.h | 1 +
drivers/gpu/drm/i915/i915_drv.h | 3 -
drivers/gpu/drm/i915/i915_gem.c | 34 +-----
drivers/gpu/drm/i915/i915_globals.c | 105 ++++++++++++++++++
drivers/gpu/drm/i915/i915_globals.h | 15 +++
drivers/gpu/drm/i915/i915_pci.c | 8 +-
drivers/gpu/drm/i915/i915_request.c | 53 +++++++--
drivers/gpu/drm/i915/i915_request.h | 10 ++
drivers/gpu/drm/i915/i915_scheduler.c | 66 ++++++++---
drivers/gpu/drm/i915/i915_scheduler.h | 34 +++++-
drivers/gpu/drm/i915/intel_guc_submission.c | 3 +-
drivers/gpu/drm/i915/intel_lrc.c | 6 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 17 ---
drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/selftests/mock_engine.c | 48 ++++----
.../gpu/drm/i915/selftests/mock_gem_device.c | 26 -----
drivers/gpu/drm/i915/selftests/mock_request.c | 12 +-
drivers/gpu/drm/i915/selftests/mock_request.h | 7 --
20 files changed, 306 insertions(+), 152 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_globals.c
create mode 100644 drivers/gpu/drm/i915/i915_globals.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1787e1299b1b..a1d834068765 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -77,6 +77,7 @@ i915-y += \
i915_gem_tiling.o \
i915_gem_userptr.o \
i915_gemfs.o \
+ i915_globals.o \
i915_query.o \
i915_request.o \
i915_scheduler.o \
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 215b6ff8aa73..9026787ebdf8 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -280,7 +280,12 @@ int __init i915_global_active_init(void)
return 0;
}
-void __exit i915_global_active_exit(void)
+void i915_global_active_shrink(void)
+{
+ kmem_cache_shrink(global.slab_cache);
+}
+
+void i915_global_active_exit(void)
{
kmem_cache_destroy(global.slab_cache);
}
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 12b5c1d287d1..5fbd9102384b 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -420,6 +420,7 @@ static inline void i915_active_fini(struct i915_active *ref) { }
#endif
int i915_global_active_init(void);
+void i915_global_active_shrink(void);
void i915_global_active_exit(void);
#endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 37230ae7fbe6..a365b1a2ea9a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1459,9 +1459,6 @@ struct drm_i915_private {
struct kmem_cache *objects;
struct kmem_cache *vmas;
struct kmem_cache *luts;
- struct kmem_cache *requests;
- struct kmem_cache *dependencies;
- struct kmem_cache *priorities;
const struct intel_device_info __info; /* Use INTEL_INFO() to access. */
struct intel_runtime_info __runtime; /* Use RUNTIME_INFO() to access. */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1eb3a5f8654c..d18c4ccff370 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -42,6 +42,7 @@
#include "i915_drv.h"
#include "i915_gem_clflush.h"
#include "i915_gemfs.h"
+#include "i915_globals.h"
#include "i915_reset.h"
#include "i915_trace.h"
#include "i915_vgpu.h"
@@ -187,6 +188,8 @@ void i915_gem_unpark(struct drm_i915_private *i915)
if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
i915->gt.epoch = 1;
+ i915_globals_unpark();
+
intel_enable_gt_powersave(i915);
i915_update_gfx_val(i915);
if (INTEL_GEN(i915) >= 6)
@@ -2916,12 +2919,11 @@ static void shrink_caches(struct drm_i915_private *i915)
* filled slabs to prioritise allocating from the mostly full slabs,
* with the aim of reducing fragmentation.
*/
- kmem_cache_shrink(i915->priorities);
- kmem_cache_shrink(i915->dependencies);
- kmem_cache_shrink(i915->requests);
kmem_cache_shrink(i915->luts);
kmem_cache_shrink(i915->vmas);
kmem_cache_shrink(i915->objects);
+
+ i915_globals_park();