On Wed, Jul 21, 2021 at 10:24:01PM +0200, Daniel Vetter wrote: > On Wed, Jul 21, 2021 at 10:17 PM Jason Ekstrand <jason@xxxxxxxxxxxxxx> wrote: > > > > On Wed, Jul 21, 2021 at 1:32 PM Daniel Vetter <daniel.vetter@xxxxxxxx> wrote: > > > > > > This essentially reverts > > > > > > commit 84a1074920523430f9dc30ff907f4801b4820072 > > > Author: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > > Date: Wed Jan 24 11:36:08 2018 +0000 > > > > > > drm/i915: Shrink the GEM kmem_caches upon idling > > > > > > mm/vmscan.c:do_shrink_slab() is a thing, if there's an issue with it > > > then we need to fix that there, not hand-roll our own slab shrinking > > > code in i915. > > > > > > Noticed while reviewing a patch set from Jason to fix up some issues > > > in our i915_init() and i915_exit() module load/cleanup code. Now that > > > i915_globals.c isn't any different than normal init/exit functions, we > > > should convert them over to one unified table and remove > > > i915_globals.[hc] entirely. > > > > Mind throwing in a comment somewhere about how i915 is one of only two > > users of kmem_cache_shrink() in the entire kernel? That also seems to > > be pretty good evidence that it's not useful. > > I missed one, there's also on in kunit (I think just got in, it's from > this year at least per commit). That one seems actually legit, it's a > selftest for some statistics around slabs I think, so has a legit > reason to carefully control the state and trim anything that just > hangs around. > > I'll add something and push when CI approves. > > > Reviewed-by: Jason Ekstrand <jason@xxxxxxxxxxxxxx> > > > > Feel free to land at-will and I'll deal with merge conflicts on my end. > > I think if we can land this, then yours, then I type the conversion to > explicit init/exit and we're done. CI approved, merged as discussed. I'll look at your series now. -Daniel > > > Cc: David Airlie <airlied@xxxxxxxx> > > > Cc: Jason Ekstrand <jason@xxxxxxxxxxxxxx> > > > Signed-off-by: Daniel Vetter <daniel.vetter@xxxxxxxxx> > > > --- > > > drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 -- > > > drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 -- > > > drivers/gpu/drm/i915/gt/intel_context.c | 6 -- > > > drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 - > > > drivers/gpu/drm/i915/i915_active.c | 6 -- > > > drivers/gpu/drm/i915/i915_globals.c | 95 --------------------- > > > drivers/gpu/drm/i915/i915_globals.h | 3 - > > > drivers/gpu/drm/i915/i915_request.c | 7 -- > > > drivers/gpu/drm/i915/i915_scheduler.c | 7 -- > > > drivers/gpu/drm/i915/i915_vma.c | 6 -- > > > 10 files changed, 146 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > index 7d6f52d8a801..bf2a2319353a 100644 > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > > > @@ -2280,18 +2280,12 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) > > > #include "selftests/i915_gem_context.c" > > > #endif > > > > > > -static void i915_global_gem_context_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_luts); > > > -} > > > - > > > static void i915_global_gem_context_exit(void) > > > { > > > kmem_cache_destroy(global.slab_luts); > > > } > > > > > > static struct i915_global_gem_context global = { { > > > - .shrink = i915_global_gem_context_shrink, > > > .exit = i915_global_gem_context_exit, > > > } }; > > > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c > > > index 9da7b288b7ed..5c21cff33199 100644 > > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > > > @@ -664,18 +664,12 @@ void i915_gem_init__objects(struct drm_i915_private *i915) > > > INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); > > > } > > > > > > -static void i915_global_objects_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_objects); > > > -} > > > - > > > static void i915_global_objects_exit(void) > > > { > > > kmem_cache_destroy(global.slab_objects); > > > } > > > > > > static struct i915_global_object global = { { > > > - .shrink = i915_global_objects_shrink, > > > .exit = i915_global_objects_exit, > > > } }; > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > > > index bd63813c8a80..c1338441cc1d 100644 > > > --- a/drivers/gpu/drm/i915/gt/intel_context.c > > > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > > > @@ -398,18 +398,12 @@ void intel_context_fini(struct intel_context *ce) > > > i915_active_fini(&ce->active); > > > } > > > > > > -static void i915_global_context_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_ce); > > > -} > > > - > > > static void i915_global_context_exit(void) > > > { > > > kmem_cache_destroy(global.slab_ce); > > > } > > > > > > static struct i915_global_context global = { { > > > - .shrink = i915_global_context_shrink, > > > .exit = i915_global_context_exit, > > > } }; > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > > index aef3084e8b16..d86825437516 100644 > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c > > > @@ -67,8 +67,6 @@ static int __gt_unpark(struct intel_wakeref *wf) > > > > > > GT_TRACE(gt, "\n"); > > > > > > - i915_globals_unpark(); > > > - > > > /* > > > * It seems that the DMC likes to transition between the DC states a lot > > > * when there are no connected displays (no active power domains) during > > > @@ -116,8 +114,6 @@ static int __gt_park(struct intel_wakeref *wf) > > > GEM_BUG_ON(!wakeref); > > > intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref); > > > > > > - i915_globals_park(); > > > - > > > return 0; > > > } > > > > > > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c > > > index b1aa1c482c32..91723123ae9f 100644 > > > --- a/drivers/gpu/drm/i915/i915_active.c > > > +++ b/drivers/gpu/drm/i915/i915_active.c > > > @@ -1176,18 +1176,12 @@ struct i915_active *i915_active_create(void) > > > #include "selftests/i915_active.c" > > > #endif > > > > > > -static void i915_global_active_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_cache); > > > -} > > > - > > > static void i915_global_active_exit(void) > > > { > > > kmem_cache_destroy(global.slab_cache); > > > } > > > > > > static struct i915_global_active global = { { > > > - .shrink = i915_global_active_shrink, > > > .exit = i915_global_active_exit, > > > } }; > > > > > > diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c > > > index 77f1911c463b..7fe2e503897b 100644 > > > --- a/drivers/gpu/drm/i915/i915_globals.c > > > +++ b/drivers/gpu/drm/i915/i915_globals.c > > > @@ -17,61 +17,8 @@ > > > > > > static LIST_HEAD(globals); > > > > > > -static atomic_t active; > > > -static atomic_t epoch; > > > -static struct park_work { > > > - struct delayed_work work; > > > - struct rcu_head rcu; > > > - unsigned long flags; > > > -#define PENDING 0 > > > - int epoch; > > > -} park; > > > - > > > -static void i915_globals_shrink(void) > > > -{ > > > - struct i915_global *global; > > > - > > > - /* > > > - * kmem_cache_shrink() discards empty slabs and reorders partially > > > - * filled slabs to prioritise allocating from the mostly full slabs, > > > - * with the aim of reducing fragmentation. > > > - */ > > > - list_for_each_entry(global, &globals, link) > > > - global->shrink(); > > > -} > > > - > > > -static void __i915_globals_grace(struct rcu_head *rcu) > > > -{ > > > - /* Ratelimit parking as shrinking is quite slow */ > > > - schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ)); > > > -} > > > - > > > -static void __i915_globals_queue_rcu(void) > > > -{ > > > - park.epoch = atomic_inc_return(&epoch); > > > - if (!atomic_read(&active)) { > > > - init_rcu_head(&park.rcu); > > > - call_rcu(&park.rcu, __i915_globals_grace); > > > - } > > > -} > > > - > > > -static void __i915_globals_park(struct work_struct *work) > > > -{ > > > - destroy_rcu_head(&park.rcu); > > > - > > > - /* Confirm nothing woke up in the last grace period */ > > > - if (park.epoch != atomic_read(&epoch)) { > > > - __i915_globals_queue_rcu(); > > > - return; > > > - } > > > - > > > - clear_bit(PENDING, &park.flags); > > > - i915_globals_shrink(); > > > -} > > > - > > > void __init i915_global_register(struct i915_global *global) > > > { > > > - GEM_BUG_ON(!global->shrink); > > > GEM_BUG_ON(!global->exit); > > > > > > list_add_tail(&global->link, &globals); > > > @@ -109,52 +56,10 @@ int __init i915_globals_init(void) > > > } > > > } > > > > > > - INIT_DELAYED_WORK(&park.work, __i915_globals_park); > > > return 0; > > > } > > > > > > -void i915_globals_park(void) > > > -{ > > > - /* > > > - * Defer shrinking the global slab caches (and other work) until > > > - * after a RCU grace period has completed with no activity. This > > > - * is to try and reduce the latency impact on the consumers caused > > > - * by us shrinking the caches the same time as they are trying to > > > - * allocate, with the assumption being that if we idle long enough > > > - * for an RCU grace period to elapse since the last use, it is likely > > > - * to be longer until we need the caches again. > > > - */ > > > - if (!atomic_dec_and_test(&active)) > > > - return; > > > - > > > - /* Queue cleanup after the next RCU grace period has freed slabs */ > > > - if (!test_and_set_bit(PENDING, &park.flags)) > > > - __i915_globals_queue_rcu(); > > > -} > > > - > > > -void i915_globals_unpark(void) > > > -{ > > > - atomic_inc(&epoch); > > > - atomic_inc(&active); > > > -} > > > - > > > -static void __exit __i915_globals_flush(void) > > > -{ > > > - atomic_inc(&active); /* skip shrinking */ > > > - > > > - rcu_barrier(); /* wait for the work to be queued */ > > > - flush_delayed_work(&park.work); > > > - > > > - atomic_dec(&active); > > > -} > > > - > > > void __exit i915_globals_exit(void) > > > { > > > - GEM_BUG_ON(atomic_read(&active)); > > > - > > > - __i915_globals_flush(); > > > __i915_globals_cleanup(); > > > - > > > - /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */ > > > - rcu_barrier(); > > > } > > > diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h > > > index 2d199f411a4a..9e6b4fd07528 100644 > > > --- a/drivers/gpu/drm/i915/i915_globals.h > > > +++ b/drivers/gpu/drm/i915/i915_globals.h > > > @@ -14,15 +14,12 @@ typedef void (*i915_global_func_t)(void); > > > struct i915_global { > > > struct list_head link; > > > > > > - i915_global_func_t shrink; > > > i915_global_func_t exit; > > > }; > > > > > > void i915_global_register(struct i915_global *global); > > > > > > int i915_globals_init(void); > > > -void i915_globals_park(void); > > > -void i915_globals_unpark(void); > > > void i915_globals_exit(void); > > > > > > /* constructors */ > > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > > index 09ebea9a0090..d3de9f60e03a 100644 > > > --- a/drivers/gpu/drm/i915/i915_request.c > > > +++ b/drivers/gpu/drm/i915/i915_request.c > > > @@ -2077,12 +2077,6 @@ void i915_request_show(struct drm_printer *m, > > > #include "selftests/i915_request.c" > > > #endif > > > > > > -static void i915_global_request_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_execute_cbs); > > > - kmem_cache_shrink(global.slab_requests); > > > -} > > > - > > > static void i915_global_request_exit(void) > > > { > > > kmem_cache_destroy(global.slab_execute_cbs); > > > @@ -2090,7 +2084,6 @@ static void i915_global_request_exit(void) > > > } > > > > > > static struct i915_global_request global = { { > > > - .shrink = i915_global_request_shrink, > > > .exit = i915_global_request_exit, > > > } }; > > > > > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c > > > index 3a58a9130309..561c649e59f7 100644 > > > --- a/drivers/gpu/drm/i915/i915_scheduler.c > > > +++ b/drivers/gpu/drm/i915/i915_scheduler.c > > > @@ -475,12 +475,6 @@ i915_sched_engine_create(unsigned int subclass) > > > return sched_engine; > > > } > > > > > > -static void i915_global_scheduler_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_dependencies); > > > - kmem_cache_shrink(global.slab_priorities); > > > -} > > > - > > > static void i915_global_scheduler_exit(void) > > > { > > > kmem_cache_destroy(global.slab_dependencies); > > > @@ -488,7 +482,6 @@ static void i915_global_scheduler_exit(void) > > > } > > > > > > static struct i915_global_scheduler global = { { > > > - .shrink = i915_global_scheduler_shrink, > > > .exit = i915_global_scheduler_exit, > > > } }; > > > > > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > > > index 5b9dce0f443b..09a7c47926f7 100644 > > > --- a/drivers/gpu/drm/i915/i915_vma.c > > > +++ b/drivers/gpu/drm/i915/i915_vma.c > > > @@ -1414,18 +1414,12 @@ void i915_vma_make_purgeable(struct i915_vma *vma) > > > #include "selftests/i915_vma.c" > > > #endif > > > > > > -static void i915_global_vma_shrink(void) > > > -{ > > > - kmem_cache_shrink(global.slab_vmas); > > > -} > > > - > > > static void i915_global_vma_exit(void) > > > { > > > kmem_cache_destroy(global.slab_vmas); > > > } > > > > > > static struct i915_global_vma global = { { > > > - .shrink = i915_global_vma_shrink, > > > .exit = i915_global_vma_exit, > > > } }; > > > > > > -- > > > 2.32.0 > > > > > > > -- > Daniel Vetter > Software Engineer, Intel Corporation > http://blog.ffwll.ch -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch