From: Mikulas Patocka <mpatocka@xxxxxxxxxx> Subject: slub: fix failure when we delete and create a slab cache In kernel 4.17 I removed some code from dm-bufio that did slab cache merging (21bb13276768) - both slab and slub support merging caches with identical attributes, so dm-bufio now just calls kmem_cache_create and relies on implicit merging. This uncovered a bug in the slub subsystem - if we delete a cache and immediatelly create another cache with the same attributes, it fails because of duplicate filename in /sys/kernel/slab/. The slub subsystem offloads freeing the cache to a workqueue - and if we create the new cache before the workqueue runs, it complains because of duplicate filename in sysfs. This patch fixes the bug by moving the call of kobject_del from sysfs_slab_remove_workfn to shutdown_cache. kobject_del must be called while we hold slab_mutex - so that the sysfs entry is deleted before a cache with the same attributes could be created. Running device-mapper-test-suite with: dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/ triggers: [ 119.618958] Buffer I/O error on dev dm-0, logical block 1572848, async page read [ 119.686224] device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5 [ 119.695821] device-mapper: thin: 253:1: aborting current metadata transaction [ 119.703255] sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144' [ 119.710394] CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25 [ 119.717608] Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017 [ 119.725177] Workqueue: dm-thin do_worker [dm_thin_pool] [ 119.730401] Call Trace: [ 119.732856] dump_stack+0x5a/0x73 [ 119.736173] sysfs_warn_dup+0x58/0x70 [ 119.739839] sysfs_create_dir_ns+0x77/0x80 [ 119.743939] kobject_add_internal+0xba/0x2e0 [ 119.748210] kobject_init_and_add+0x70/0xb0 [ 119.752399] ? sysfs_slab_add+0x101/0x250 [ 119.756409] sysfs_slab_add+0xb1/0x250 [ 119.760161] __kmem_cache_create+0x116/0x150 [ 119.764436] ? number+0x2fb/0x340 [ 119.767755] ? _cond_resched+0x15/0x30 [ 119.771508] create_cache+0xd9/0x1f0 [ 119.775085] kmem_cache_create_usercopy+0x1c1/0x250 [ 119.779965] kmem_cache_create+0x18/0x20 [ 119.783894] dm_bufio_client_create+0x1ae/0x410 [dm_bufio] [ 119.789380] ? dm_block_manager_alloc_callback+0x20/0x20 [dm_persistent_data] [ 119.796509] ? kmem_cache_alloc_trace+0xae/0x1d0 [ 119.801131] dm_block_manager_create+0x5e/0x90 [dm_persistent_data] [ 119.807397] __create_persistent_data_objects+0x38/0x940 [dm_thin_pool] [ 119.814008] dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool] [ 119.819669] metadata_operation_failed+0x59/0x100 [dm_thin_pool] [ 119.825673] alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool] [ 119.831592] process_cell+0x2a3/0x550 [dm_thin_pool] [ 119.836558] ? mempool_alloc+0x6f/0x180 [ 119.840400] ? u32_swap+0x10/0x10 [ 119.843717] ? sort+0x17b/0x270 [ 119.846863] ? u32_swap+0x10/0x10 [ 119.850181] do_worker+0x28d/0x8f0 [dm_thin_pool] [ 119.854890] ? move_linked_works+0x6f/0xa0 [ 119.858989] process_one_work+0x171/0x370 [ 119.862999] worker_thread+0x49/0x3f0 [ 119.866669] kthread+0xf8/0x130 [ 119.869813] ? max_active_store+0x80/0x80 [ 119.873827] ? kthread_bind+0x10/0x10 [ 119.877493] ret_from_fork+0x35/0x40 [ 119.881076] kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory. [ 119.893580] kmem_cache_create(dm_bufio_buffer-16) failed with error -17 Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> Reported-by: Mike Snitzer <snitzer@xxxxxxxxxx> Tested-by: Mike Snitzer <snitzer@xxxxxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Pekka Enberg <penberg@xxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Cc: <stable@xxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- diff -puN include/linux/slub_def.h~slub-fix-failure-when-we-delete-and-create-a-slab-cache include/linux/slub_def.h --- a/include/linux/slub_def.h~slub-fix-failure-when-we-delete-and-create-a-slab-cache +++ a/include/linux/slub_def.h @@ -155,8 +155,12 @@ struct kmem_cache { #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS +void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); #else +static inline void sysfs_slab_unlink(struct kmem_cache *s) +{ +} static inline void sysfs_slab_release(struct kmem_cache *s) { } diff -puN mm/slab_common.c~slub-fix-failure-when-we-delete-and-create-a-slab-cache mm/slab_common.c --- a/mm/slab_common.c~slub-fix-failure-when-we-delete-and-create-a-slab-cache +++ a/mm/slab_common.c @@ -567,10 +567,14 @@ static int shutdown_cache(struct kmem_ca list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); +#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { #ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); sysfs_slab_release(s); #else slab_kmem_cache_release(s); diff -puN mm/slub.c~slub-fix-failure-when-we-delete-and-create-a-slab-cache mm/slub.c --- a/mm/slub.c~slub-fix-failure-when-we-delete-and-create-a-slab-cache +++ a/mm/slub.c @@ -5667,7 +5667,6 @@ static void sysfs_slab_remove_workfn(str kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); out: kobject_put(&s->kobj); } @@ -5752,6 +5751,12 @@ static void sysfs_slab_remove(struct kme schedule_work(&s->kobj_remove_work); } +void sysfs_slab_unlink(struct kmem_cache *s) +{ + if (slab_state >= FULL) + kobject_del(&s->kobj); +} + void sysfs_slab_release(struct kmem_cache *s) { if (slab_state >= FULL) _