On 08.12.19 20:11, Al Viro wrote: > On Sat, Nov 30, 2019 at 11:36:15AM -0800, Matthew Wilcox wrote: >> On Sat, Nov 30, 2019 at 03:53:10PM +0800, yukuai (C) wrote: >>> On 2019/11/30 11:43, Matthew Wilcox wrote: >>>> On Sat, Nov 30, 2019 at 10:02:23AM +0800, yu kuai wrote: >>>>> However, a single 'DENTRY_D_LOCK_NESTED' may not be enough if more than >>>>> two dentry are involed. So, add in 'DENTRY_D_LOCK_NESTED_TWICE'. >>>> >>>> No. These need meaningful names. Indeed, I think D_LOCK_NESTED is >>>> a terrible name. >>>> >>>> The exception is __d_move() where I think we should actually name the >>>> different lock classes instead of using a bare '2' and '3'. Something >>>> like this, perhaps: >>> >>> Thanks for looking into this, do you mind if I replace your patch with the >>> first two patches in the patchset? >> >> That's fine by me, but I think we should wait for Al to give his approval >> before submitting a new version. > > IMO this is a wrong approach. It's papering over a confused code in > debugfs recursive removal and it would be better to get rid of _that_, > rather than try and slap bandaids on it. > > I suspect that the following would be a better way to deal with it; it adds > a new primitive and converts debugfs and tracefs to that. There are > followups converting other such places, still not finished. > > commit 7e9c8a08889bf42bbe64e80e456d2eca824e5db2 > Author: Al Viro <viro@xxxxxxxxxxxxxxxxxx> > Date: Mon Nov 18 09:43:10 2019 -0500 > > simple_recursive_removal(): kernel-side rm -rf for ramfs-style filesystems > > two requirements: no file creations in IS_DEADDIR and no cross-directory > renames whatsoever. > > Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx> > > diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c > index 042b688ed124..da936c54d879 100644 > --- a/fs/debugfs/inode.c > +++ b/fs/debugfs/inode.c > @@ -309,7 +309,10 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) > parent = debugfs_mount->mnt_root; > > inode_lock(d_inode(parent)); > - dentry = lookup_one_len(name, parent, strlen(name)); > + if (unlikely(IS_DEADDIR(d_inode(parent)))) > + dentry = ERR_PTR(-ENOENT); > + else > + dentry = lookup_one_len(name, parent, strlen(name)); > if (!IS_ERR(dentry) && d_really_is_positive(dentry)) { > if (d_is_dir(dentry)) > pr_err("Directory '%s' with parent '%s' already present!\n", > @@ -657,62 +660,15 @@ static void __debugfs_file_removed(struct dentry *dentry) > wait_for_completion(&fsd->active_users_drained); > } > > -static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) > -{ > - int ret = 0; > - > - if (simple_positive(dentry)) { > - dget(dentry); > - if (d_is_dir(dentry)) { > - ret = simple_rmdir(d_inode(parent), dentry); > - if (!ret) > - fsnotify_rmdir(d_inode(parent), dentry); > - } else { > - simple_unlink(d_inode(parent), dentry); > - fsnotify_unlink(d_inode(parent), dentry); > - } > - if (!ret) > - d_delete(dentry); > - if (d_is_reg(dentry)) > - __debugfs_file_removed(dentry); > - dput(dentry); > - } > - return ret; > -} > - > -/** > - * debugfs_remove - removes a file or directory from the debugfs filesystem > - * @dentry: a pointer to a the dentry of the file or directory to be > - * removed. If this parameter is NULL or an error value, nothing > - * will be done. > - * > - * This function removes a file or directory in debugfs that was previously > - * created with a call to another debugfs function (like > - * debugfs_create_file() or variants thereof.) > - * > - * This function is required to be called in order for the file to be > - * removed, no automatic cleanup of files will happen when a module is > - * removed, you are responsible here. > - */ > -void debugfs_remove(struct dentry *dentry) > +static void remove_one(struct dentry *victim) > { > - struct dentry *parent; > - int ret; > - > - if (IS_ERR_OR_NULL(dentry)) > - return; > - > - parent = dentry->d_parent; > - inode_lock(d_inode(parent)); > - ret = __debugfs_remove(dentry, parent); > - inode_unlock(d_inode(parent)); > - if (!ret) > - simple_release_fs(&debugfs_mount, &debugfs_mount_count); > + if (d_is_reg(victim)) > + __debugfs_file_removed(victim); > + simple_release_fs(&debugfs_mount, &debugfs_mount_count); > } > -EXPORT_SYMBOL_GPL(debugfs_remove); > > /** > - * debugfs_remove_recursive - recursively removes a directory > + * debugfs_remove - recursively removes a directory > * @dentry: a pointer to a the dentry of the directory to be removed. If this > * parameter is NULL or an error value, nothing will be done. > * > @@ -724,65 +680,16 @@ EXPORT_SYMBOL_GPL(debugfs_remove); > * removed, no automatic cleanup of files will happen when a module is > * removed, you are responsible here. > */ > -void debugfs_remove_recursive(struct dentry *dentry) > +void debugfs_remove(struct dentry *dentry) > { > - struct dentry *child, *parent; > - > if (IS_ERR_OR_NULL(dentry)) > return; > > - parent = dentry; > - down: > - inode_lock(d_inode(parent)); > - loop: > - /* > - * The parent->d_subdirs is protected by the d_lock. Outside that > - * lock, the child can be unlinked and set to be freed which can > - * use the d_u.d_child as the rcu head and corrupt this list. > - */ > - spin_lock(&parent->d_lock); > - list_for_each_entry(child, &parent->d_subdirs, d_child) { > - if (!simple_positive(child)) > - continue; > - > - /* perhaps simple_empty(child) makes more sense */ > - if (!list_empty(&child->d_subdirs)) { > - spin_unlock(&parent->d_lock); > - inode_unlock(d_inode(parent)); > - parent = child; > - goto down; > - } > - > - spin_unlock(&parent->d_lock); > - > - if (!__debugfs_remove(child, parent)) > - simple_release_fs(&debugfs_mount, &debugfs_mount_count); > - > - /* > - * The parent->d_lock protects agaist child from unlinking > - * from d_subdirs. When releasing the parent->d_lock we can > - * no longer trust that the next pointer is valid. > - * Restart the loop. We'll skip this one with the > - * simple_positive() check. > - */ > - goto loop; > - } > - spin_unlock(&parent->d_lock); > - > - inode_unlock(d_inode(parent)); > - child = parent; > - parent = parent->d_parent; > - inode_lock(d_inode(parent)); > - > - if (child != dentry) > - /* go up */ > - goto loop; > - > - if (!__debugfs_remove(child, parent)) > - simple_release_fs(&debugfs_mount, &debugfs_mount_count); > - inode_unlock(d_inode(parent)); > + simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); > + simple_recursive_removal(dentry, remove_one); > + simple_release_fs(&debugfs_mount, &debugfs_mount_count); > } > -EXPORT_SYMBOL_GPL(debugfs_remove_recursive); > +EXPORT_SYMBOL_GPL(debugfs_remove); > > /** > * debugfs_rename - rename a file/directory in the debugfs filesystem > diff --git a/fs/libfs.c b/fs/libfs.c > index 540611b99b9a..b67003a948ed 100644 > --- a/fs/libfs.c > +++ b/fs/libfs.c > @@ -19,6 +19,7 @@ > #include <linux/buffer_head.h> /* sync_mapping_buffers */ > #include <linux/fs_context.h> > #include <linux/pseudo_fs.h> > +#include <linux/fsnotify.h> > > #include <linux/uaccess.h> > > @@ -239,6 +240,75 @@ const struct inode_operations simple_dir_inode_operations = { > }; > EXPORT_SYMBOL(simple_dir_inode_operations); > > +static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) > +{ > + struct dentry *child = NULL; > + struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs; > + > + spin_lock(&parent->d_lock); > + while ((p = p->next) != &parent->d_subdirs) { > + struct dentry *d = container_of(p, struct dentry, d_child); > + if (simple_positive(d)) { > + spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); > + if (simple_positive(d)) > + child = dget_dlock(d); > + spin_unlock(&d->d_lock); > + if (likely(child)) > + break; > + } > + } > + spin_unlock(&parent->d_lock); > + dput(prev); > + return child; > +} > + > +void simple_recursive_removal(struct dentry *dentry, > + void (*callback)(struct dentry *)) > +{ > + struct dentry *this = dentry; > + while (true) { > + struct dentry *victim = NULL, *child; > + struct inode *inode = this->d_inode; > + > + inode_lock(inode); > + if (d_is_dir(this)) > + inode->i_flags |= S_DEAD; > + while ((child = find_next_child(this, victim)) == NULL) { > + // kill and ascend > + // update metadata while it's still locked > + inode->i_ctime = current_time(inode); > + clear_nlink(inode); > + inode_unlock(inode); > + victim = this; > + this = this->d_parent; > + inode = this->d_inode; > + inode_lock(inode); > + if (simple_positive(victim)) { > + d_invalidate(victim); // avoid lost mounts > + if (d_is_dir(victim)) > + fsnotify_rmdir(inode, victim); > + else > + fsnotify_unlink(inode, victim); > + if (callback) > + callback(victim); > + dput(victim); // unpin it > + } > + if (victim == dentry) { > + inode->i_ctime = inode->i_mtime = > + current_time(inode); > + if (d_is_dir(dentry)) > + drop_nlink(inode); > + inode_unlock(inode); > + dput(dentry); > + return; > + } > + } > + inode_unlock(inode); > + this = child; > + } > +} > +EXPORT_SYMBOL(simple_recursive_removal); > + > static const struct super_operations simple_super_operations = { > .statfs = simple_statfs, > }; > diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c > index eeeae0475da9..2a16c0eb97e4 100644 > --- a/fs/tracefs/inode.c > +++ b/fs/tracefs/inode.c > @@ -329,7 +329,10 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) > parent = tracefs_mount->mnt_root; > > inode_lock(parent->d_inode); > - dentry = lookup_one_len(name, parent, strlen(name)); > + if (unlikely(IS_DEADDIR(parent->d_inode))) > + dentry = ERR_PTR(-ENOENT); > + else > + dentry = lookup_one_len(name, parent, strlen(name)); > if (!IS_ERR(dentry) && dentry->d_inode) { > dput(dentry); > dentry = ERR_PTR(-EEXIST); > @@ -495,122 +498,27 @@ __init struct dentry *tracefs_create_instance_dir(const char *name, > return dentry; > } > > -static int __tracefs_remove(struct dentry *dentry, struct dentry *parent) > +static void remove_one(struct dentry *victim) > { > - int ret = 0; > - > - if (simple_positive(dentry)) { > - if (dentry->d_inode) { > - dget(dentry); > - switch (dentry->d_inode->i_mode & S_IFMT) { > - case S_IFDIR: > - ret = simple_rmdir(parent->d_inode, dentry); > - if (!ret) > - fsnotify_rmdir(parent->d_inode, dentry); > - break; > - default: > - simple_unlink(parent->d_inode, dentry); > - fsnotify_unlink(parent->d_inode, dentry); > - break; > - } > - if (!ret) > - d_delete(dentry); > - dput(dentry); > - } > - } > - return ret; > -} > - > -/** > - * tracefs_remove - removes a file or directory from the tracefs filesystem > - * @dentry: a pointer to a the dentry of the file or directory to be > - * removed. > - * > - * This function removes a file or directory in tracefs that was previously > - * created with a call to another tracefs function (like > - * tracefs_create_file() or variants thereof.) > - */ > -void tracefs_remove(struct dentry *dentry) > -{ > - struct dentry *parent; > - int ret; > - > - if (IS_ERR_OR_NULL(dentry)) > - return; > - > - parent = dentry->d_parent; > - inode_lock(parent->d_inode); > - ret = __tracefs_remove(dentry, parent); > - inode_unlock(parent->d_inode); > - if (!ret) > - simple_release_fs(&tracefs_mount, &tracefs_mount_count); > + simple_release_fs(&tracefs_mount, &tracefs_mount_count); > } > > /** > - * tracefs_remove_recursive - recursively removes a directory > + * tracefs_remove - recursively removes a directory > * @dentry: a pointer to a the dentry of the directory to be removed. > * > * This function recursively removes a directory tree in tracefs that > * was previously created with a call to another tracefs function > * (like tracefs_create_file() or variants thereof.) > */ > -void tracefs_remove_recursive(struct dentry *dentry) > +void tracefs_remove(struct dentry *dentry) > { > - struct dentry *child, *parent; > - > if (IS_ERR_OR_NULL(dentry)) > return; > > - parent = dentry; > - down: > - inode_lock(parent->d_inode); > - loop: > - /* > - * The parent->d_subdirs is protected by the d_lock. Outside that > - * lock, the child can be unlinked and set to be freed which can > - * use the d_u.d_child as the rcu head and corrupt this list. > - */ > - spin_lock(&parent->d_lock); > - list_for_each_entry(child, &parent->d_subdirs, d_child) { > - if (!simple_positive(child)) > - continue; > - > - /* perhaps simple_empty(child) makes more sense */ > - if (!list_empty(&child->d_subdirs)) { > - spin_unlock(&parent->d_lock); > - inode_unlock(parent->d_inode); > - parent = child; > - goto down; > - } > - > - spin_unlock(&parent->d_lock); > - > - if (!__tracefs_remove(child, parent)) > - simple_release_fs(&tracefs_mount, &tracefs_mount_count); > - > - /* > - * The parent->d_lock protects agaist child from unlinking > - * from d_subdirs. When releasing the parent->d_lock we can > - * no longer trust that the next pointer is valid. > - * Restart the loop. We'll skip this one with the > - * simple_positive() check. > - */ > - goto loop; > - } > - spin_unlock(&parent->d_lock); > - > - inode_unlock(parent->d_inode); > - child = parent; > - parent = parent->d_parent; > - inode_lock(parent->d_inode); > - > - if (child != dentry) > - /* go up */ > - goto loop; > - > - if (!__tracefs_remove(child, parent)) > - simple_release_fs(&tracefs_mount, &tracefs_mount_count); > - inode_unlock(parent->d_inode); > + simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); > + simple_recursive_removal(dentry, remove_one); > + simple_release_fs(&tracefs_mount, &tracefs_mount_count); > } > > /** > diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h > index 58424eb3b329..0a817d763f0f 100644 > --- a/include/linux/debugfs.h > +++ b/include/linux/debugfs.h > @@ -82,7 +82,7 @@ struct dentry *debugfs_create_automount(const char *name, > void *data); > > void debugfs_remove(struct dentry *dentry); > -void debugfs_remove_recursive(struct dentry *dentry); > +#define debugfs_remove_recursive debugfs_remove > > const struct file_operations *debugfs_real_fops(const struct file *filp); > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 997a530ff4e9..73ffc8654987 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -3242,6 +3242,8 @@ extern int simple_unlink(struct inode *, struct dentry *); > extern int simple_rmdir(struct inode *, struct dentry *); > extern int simple_rename(struct inode *, struct dentry *, > struct inode *, struct dentry *, unsigned int); > +extern void simple_recursive_removal(struct dentry *, > + void (*callback)(struct dentry *)); > extern int noop_fsync(struct file *, loff_t, loff_t, int); > extern int noop_set_page_dirty(struct page *page); > extern void noop_invalidatepage(struct page *page, unsigned int offset, > diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h > index 88d279c1b863..99912445974c 100644 > --- a/include/linux/tracefs.h > +++ b/include/linux/tracefs.h > @@ -28,7 +28,6 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, > struct dentry *tracefs_create_dir(const char *name, struct dentry *parent); > > void tracefs_remove(struct dentry *dentry); > -void tracefs_remove_recursive(struct dentry *dentry); > > struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent, > int (*mkdir)(const char *name), > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index 563e80f9006a..88d94dc3ed37 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -8366,7 +8366,7 @@ struct trace_array *trace_array_create(const char *name) > > ret = event_trace_add_tracer(tr->dir, tr); > if (ret) { > - tracefs_remove_recursive(tr->dir); > + tracefs_remove(tr->dir); > goto out_free_tr; > } > > @@ -8422,7 +8422,7 @@ static int __remove_instance(struct trace_array *tr) > event_trace_del_tracer(tr); > ftrace_clear_pids(tr); > ftrace_destroy_function_files(tr); > - tracefs_remove_recursive(tr->dir); > + tracefs_remove(tr->dir); > free_trace_buffers(tr); > > for (i = 0; i < tr->nr_topts; i++) { > diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c > index 648930823b57..25bb3e8fb170 100644 > --- a/kernel/trace/trace_events.c > +++ b/kernel/trace/trace_events.c > @@ -696,7 +696,7 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) > return; > > if (!--dir->nr_events) { > - tracefs_remove_recursive(dir->entry); > + tracefs_remove(dir->entry); > list_del(&dir->list); > __put_system_dir(dir); > } > @@ -715,7 +715,7 @@ static void remove_event_file_dir(struct trace_event_file *file) > } > spin_unlock(&dir->d_lock); > > - tracefs_remove_recursive(dir); > + tracefs_remove(dir); > } > > list_del(&file->list); > @@ -3032,7 +3032,7 @@ int event_trace_del_tracer(struct trace_array *tr) > > down_write(&trace_event_sem); > __trace_remove_event_dirs(tr); > - tracefs_remove_recursive(tr->event_dir); > + tracefs_remove(tr->event_dir); > up_write(&trace_event_sem); > > tr->event_dir = NULL; > diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c > index fa95139445b2..fa45a031848c 100644 > --- a/kernel/trace/trace_hwlat.c > +++ b/kernel/trace/trace_hwlat.c > @@ -551,7 +551,7 @@ static int init_tracefs(void) > return 0; > > err: > - tracefs_remove_recursive(top_dir); > + tracefs_remove(top_dir); > return -ENOMEM; > } > > The patch in linux-next commit 653f0d05be0948e7610bb786e6570bb6c48a4e75 (HEAD, refs/bisect/bad) Author: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Date: Mon Nov 18 09:43:10 2019 -0500 simple_recursive_removal(): kernel-side rm -rf for ramfs-style filesystems two requirements: no file creations in IS_DEADDIR and no cross-directory renames whatsoever. Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Makes my simple QEMU setup crash when booting [ 4.571181] list_del corruption. prev->next should be ffff8b75df3408d0, but was ffff8b75df340d50 [ 4.572064] ------------[ cut here ]------------ [ 4.572448] kernel BUG at lib/list_debug.c:51! [ 4.572838] invalid opcode: 0000 [#1] SMP NOPTI [ 4.573235] CPU: 0 PID: 479 Comm: systemd-udevd Not tainted 5.5.0-rc1+ #14 [ 4.573827] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4 [ 4.574782] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55 [ 4.575252] Code: 0d 3d a8 e8 14 dd bd ff 0f 0b 48 c7 c7 00 0e 3d a8 e8 06 dd bd ff 0f 0b 48 89 f2 48 89 fe 48 c7b [ 4.576829] RSP: 0018:ffffaef9401ebd30 EFLAGS: 00010246 [ 4.577283] RAX: 0000000000000054 RBX: ffff8b75df3416c0 RCX: 0000000000000000 [ 4.577879] RDX: 0000000000000000 RSI: ffff8b757fa1a248 RDI: ffff8b757fa1a248 [ 4.578479] RBP: ffff8b75df3407e0 R08: 0000000000000000 R09: 0000000000000000 [ 4.579055] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8b75df340860 [ 4.579660] R13: 0000000000000000 R14: ffff8b75df3416c0 R15: ffff8b75d6bda620 [ 4.580257] FS: 00007f3016d08940(0000) GS:ffff8b757fa00000(0000) knlGS:0000000000000000 [ 4.580941] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.581425] CR2: 00005555eedf9cf3 CR3: 0000000197fa2000 CR4: 00000000000006f0 [ 4.582034] Call Trace: [ 4.582256] __dentry_kill+0x86/0x190 [ 4.582577] ? dput+0x20/0x460 [ 4.582839] dput+0x2a6/0x460 [ 4.583100] debugfs_remove+0x40/0x60 [ 4.583403] blk_mq_debugfs_unregister_sched+0x15/0x30 [ 4.583825] blk_mq_exit_sched+0x6b/0xa0 [ 4.584154] __elevator_exit+0x32/0x50 [ 4.584460] elevator_switch_mq+0x63/0x170 [ 4.584801] elevator_switch+0x33/0x70 [ 4.585114] elv_iosched_store+0x135/0x1b0 [ 4.585450] queue_attr_store+0x47/0x70 [ 4.585779] kernfs_fop_write+0xdc/0x1c0 [ 4.586128] vfs_write+0xdb/0x1d0 [ 4.586423] ksys_write+0x65/0xe0 [ 4.586716] do_syscall_64+0x5c/0xa0 [ 4.587029] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 4.587472] RIP: 0033:0x7f3017d4a467 [ 4.587782] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 854 [ 4.589353] RSP: 002b:00007ffc7fa4f518 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 4.589994] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f3017d4a467 [ 4.590605] RDX: 0000000000000004 RSI: 00007ffc7fa4f600 RDI: 000000000000000f [ 4.591212] RBP: 00007ffc7fa4f600 R08: fefefefefefefeff R09: ffffffff00000000 [ 4.591820] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004 [ 4.592423] R13: 000055cdeaffe190 R14: 0000000000000004 R15: 00007f3017e1b700 [ 4.593032] Modules linked in: [ 4.593307] ---[ end trace 42f66ce1e6e1c1fe ]--- [ 4.593694] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55 [ 4.594173] Code: 0d 3d a8 e8 14 dd bd ff 0f 0b 48 c7 c7 00 0e 3d a8 e8 06 dd bd ff 0f 0b 48 89 f2 48 89 fe 48 c7b [ 4.595756] RSP: 0018:ffffaef9401ebd30 EFLAGS: 00010246 [ 4.596205] RAX: 0000000000000054 RBX: ffff8b75df3416c0 RCX: 0000000000000000 [ 4.596818] RDX: 0000000000000000 RSI: ffff8b757fa1a248 RDI: ffff8b757fa1a248 [ 4.597423] RBP: ffff8b75df3407e0 R08: 0000000000000000 R09: 0000000000000000 [ 4.598038] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8b75df340860 [ 4.598640] R13: 0000000000000000 R14: ffff8b75df3416c0 R15: ffff8b75d6bda620 [ 4.599241] FS: 00007f3016d08940(0000) GS:ffff8b757fa00000(0000) knlGS:0000000000000000 [ 4.599936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.600415] CR2: 00005555eedf9cf3 CR3: 0000000197fa2000 CR4: 00000000000006f0 [ 4.601034] BUG: sleeping function called from invalid context at include/linux/percpu-rwsem.h:38 [ 4.601789] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 479, name: systemd-udevd [ 4.602505] INFO: lockdep is turned off. [ 4.602837] CPU: 0 PID: 479 Comm: systemd-udevd Tainted: G D 5.5.0-rc1+ #14 [ 4.603549] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4 [ 4.604520] Call Trace: [ 4.604736] dump_stack+0x8f/0xd0 [ 4.605020] ___might_sleep.cold+0xb3/0xc3 [ 4.605374] exit_signals+0x30/0x2d0 [ 4.605689] do_exit+0xb4/0xc40 [ 4.605961] ? ksys_write+0x65/0xe0 [ 4.606256] rewind_stack_do_exit+0x17/0x20 [ 4.606624] note: systemd-udevd[479] exited with preempt_count 2 [ 4.611186] list_del corruption. prev->next should be ffff8b75df3489f0, but was ffff8b75df3480f0 [ 4.611972] ------------[ cut here ]------------ [ 4.612371] kernel BUG at lib/list_debug.c:51! [ 4.612783] invalid opcode: 0000 [#2] SMP NOPTI [ 4.613161] CPU: 0 PID: 511 Comm: systemd-udevd Tainted: G D W 5.5.0-rc1+ #14 [ 4.613875] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu4 [ 4.614842] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55 [ 4.615309] Code: 0d 3d a8 e8 14 dd bd ff 0f 0b 48 c7 c7 00 0e 3d a8 e8 06 dd bd ff 0f 0b 48 89 f2 48 89 fe 48 c7b [ 4.616880] RSP: 0018:ffffaef9402a3d30 EFLAGS: 00010246 [ 4.617327] RAX: 0000000000000054 RBX: ffff8b75df347240 RCX: 0000000000000000 [ 4.617930] RDX: 0000000000000000 RSI: ffff8b757fa1a248 RDI: ffff8b757fa1a248 [ 4.618541] RBP: ffff8b75df348900 R08: 0000000000000000 R09: 0000000000000001 [ 4.619140] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8b75df348980 [ 4.619743] R13: 0000000000000000 R14: ffff8b75df347240 R15: ffff8b75d6a25020 [ 4.620349] FS: 00007f3016d08940(0000) GS:ffff8b757fa00000(0000) knlGS:0000000000000000 [ 4.621030] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.621508] CR2: 000055cdeb042e48 CR3: 000000019e202000 CR4: 00000000000006f0 [ 4.622099] Call Trace: [ 4.622322] __dentry_kill+0x86/0x190 [ 4.622645] ? dput+0x20/0x460 [ 4.622911] dput+0x2a6/0x460 [ 4.623170] debugfs_remove+0x40/0x60 [ 4.623495] blk_mq_debugfs_unregister_sched+0x15/0x30 [ 4.623929] blk_mq_exit_sched+0x6b/0xa0 [ 4.624264] __elevator_exit+0x32/0x50 [ 4.624593] elevator_switch_mq+0x63/0x170 [ 4.624945] elevator_switch+0x33/0x70 [ 4.625268] elv_iosched_store+0x135/0x1b0 [ 4.625619] queue_attr_store+0x47/0x70 [ 4.625951] kernfs_fop_write+0xdc/0x1c0 [ 4.626289] vfs_write+0xdb/0x1d0 [ 4.626583] ksys_write+0x65/0xe0 [ 4.626870] do_syscall_64+0x5c/0xa0 [ 4.627180] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 4.627616] RIP: 0033:0x7f3017d4a467 [ 4.627925] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 854 [ 4.629499] RSP: 002b:00007ffc7fa4f578 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 4.630136] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f3017d4a467 [ 4.630735] RDX: 0000000000000004 RSI: 00007ffc7fa4f660 RDI: 000000000000000f [ 4.631334] RBP: 00007ffc7fa4f660 R08: fefefefefefefeff R09: ffffffff00000000 [ 4.631940] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004 [ 4.632540] R13: 000055cdeaffe190 R14: 0000000000000004 R15: 00007f3017e1b700 [ 4.633141] Modules linked in: [ 4.633414] ---[ end trace 42f66ce1e6e1c1ff ]--- [ 4.633814] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55 [ 4.634289] Code: 0d 3d a8 e8 14 dd bd ff 0f 0b 48 c7 c7 00 0e 3d a8 e8 06 dd bd ff 0f 0b 48 89 f2 48 89 fe 48 c7b [ 4.635881] RSP: 0018:ffffaef9401ebd30 EFLAGS: 00010246 [ 4.636329] RAX: 0000000000000054 RBX: ffff8b75df3416c0 RCX: 0000000000000000 [ 4.636940] RDX: 0000000000000000 RSI: ffff8b757fa1a248 RDI: ffff8b757fa1a248 [ 4.637544] RBP: ffff8b75df3407e0 R08: 0000000000000000 R09: 0000000000000000 [ 4.638149] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8b75df340860 [ 4.638750] R13: 0000000000000000 R14: ffff8b75df3416c0 R15: ffff8b75d6bda620 [ 4.639360] FS: 00007f3016d08940(0000) GS:ffff8b757fa00000(0000) knlGS:0000000000000000 [ 4.640047] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.640537] CR2: 000055cdeb042e48 CR3: 000000019e202000 CR4: 00000000000006f0 [ 4.641128] note: systemd-udevd[511] exited with preempt_count 2 Reverting that commit makes it work again. How does that untested and unreviewed patch end up in linux-next? Took me 30min to bisect. -- Thanks, David / dhildenb