[-next conflict imminent] Re: [PATCH v2 0/7] mm, slub: handle pending kfree_rcu() in kmem_cache_destroy()

Vlastimil Babka <vbabka@xxxxxxx> · Fri, 9 Aug 2024 17:02:44 +0200

On 8/7/24 12:31, Vlastimil Babka wrote:
> Also in git:
> https://git.kernel.org/vbabka/l/slab-kfree_rcu-destroy-v2r2

I've added this to slab/for-next, there will be some conflicts and here's my
resulting git show or the merge commit I tried over today's next.

It might look a bit different with tomorrow's next as mm will have v7 of the
conflicting series from Jann:

https://lore.kernel.org/all/1ca6275f-a2fc-4bad-81dc-6257d4f8d750@xxxxxxx/

(also I did resolve it in the way I suggested to move Jann's block before
taking slab_mutex() but unless that happens in mm-unstable it would probably be more
correct to keep where he did)

---8<---
commit 444486f2b7b0325ba026e0ad129eba3f54c18301
Merge: 61c01d2e181a 63eac6bdcf9f
Author: Vlastimil Babka <vbabka@xxxxxxx>
Date:   Fri Aug 9 16:49:03 2024 +0200

    Merge branch 'slab/for-6.12/rcu_barriers' into HEAD

diff --cc include/linux/rcutree.h
index 7dbde2b6f714,58e7db80f3a8..90a684f94776

--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@@ -35,9 -35,10 +35,10 @@@ static inline void rcu_virt_note_contex
  
  void synchronize_rcu_expedited(void);
  void kvfree_call_rcu(struct rcu_head *head, void *ptr);
+ void kvfree_rcu_barrier(void);
  
  void rcu_barrier(void);
 -void rcu_momentary_dyntick_idle(void);
 +void rcu_momentary_eqs(void);
  void kfree_rcu_scheduler_running(void);
  bool rcu_gp_might_be_stalled(void);
  
diff --cc kernel/rcu/tree.c
index 930846f06bee,ebcfed9b570e..4606fa361b06
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@ -3614,7 -3631,7 +3611,7 @@@ kvfree_rcu_queue_batch(struct kfree_rcu
  			// be that the work is in the pending state when
  			// channels have been detached following by each
  			// other.
- 			queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
 -			queued = queue_rcu_work(system_wq, &krwp->rcu_work);
++			queued = queue_rcu_work(system_unbound_wq, &krwp->rcu_work);
  		}
  	}
  
diff --cc mm/slab_common.c
index fc7b1250d929,1a2873293f5d..82f287c21954
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@@ -511,67 -487,40 +505,52 @@@ EXPORT_SYMBOL(kmem_buckets_create)
   */
  static void kmem_cache_release(struct kmem_cache *s)
  {
- 	if (slab_state >= FULL) {
- 		sysfs_slab_unlink(s);
+ 	kfence_shutdown_cache(s);
+ 	if (__is_defined(SLAB_SUPPORTS_SYSFS) && slab_state >= FULL)
  		sysfs_slab_release(s);
- 	} else {
+ 	else
  		slab_kmem_cache_release(s);
- 	}
  }
- #else
- static void kmem_cache_release(struct kmem_cache *s)
+ 
+ void slab_kmem_cache_release(struct kmem_cache *s)
  {
- 	slab_kmem_cache_release(s);
+ 	__kmem_cache_release(s);
+ 	kfree_const(s->name);
+ 	kmem_cache_free(kmem_cache, s);
  }
- #endif
  
- static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
+ void kmem_cache_destroy(struct kmem_cache *s)
  {
- 	LIST_HEAD(to_destroy);
- 	struct kmem_cache *s, *s2;
- 
- 	/*
- 	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the
- 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
- 	 * through RCU and the associated kmem_cache are dereferenced
- 	 * while freeing the pages, so the kmem_caches should be freed only
- 	 * after the pending RCU operations are finished.  As rcu_barrier()
- 	 * is a pretty slow operation, we batch all pending destructions
- 	 * asynchronously.
- 	 */
- 	mutex_lock(&slab_mutex);
- 	list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
- 	mutex_unlock(&slab_mutex);
+ 	int err;
  
- 	if (list_empty(&to_destroy))
+ 	if (unlikely(!s) || !kasan_check_byte(s))
  		return;
  
- 	rcu_barrier();
+ 	/* in-flight kfree_rcu()'s may include objects from our cache */
+ 	kvfree_rcu_barrier();
  
- 	list_for_each_entry_safe(s, s2, &to_destroy, list) {
- 		debugfs_slab_release(s);
- 		kfence_shutdown_cache(s);
- 		kmem_cache_release(s);
- 	}
- }
- 
- static int shutdown_cache(struct kmem_cache *s)
- {
 +	if (IS_ENABLED(CONFIG_SLUB_RCU_DEBUG) &&
 +	    (s->flags & SLAB_TYPESAFE_BY_RCU)) {
 +		/*
 +		 * Under CONFIG_SLUB_RCU_DEBUG, when objects in a
 +		 * SLAB_TYPESAFE_BY_RCU slab are freed, SLUB will internally
 +		 * defer their freeing with call_rcu().
 +		 * Wait for such call_rcu() invocations here before actually
 +		 * destroying the cache.
 +		 */
 +		rcu_barrier();
 +	}
 +
+ 	cpus_read_lock();
+ 	mutex_lock(&slab_mutex);
+ 
+ 	s->refcount--;
+ 	if (s->refcount) {
+ 		mutex_unlock(&slab_mutex);
+ 		cpus_read_unlock();
+ 		return;
+ 	}
+ 
  	/* free asan quarantined objects */
  	kasan_cache_shutdown(s);