Nothing prevents a dentry found by path lookup before a return of __debugfs_remove() to actually get opened after that return. Now, after the return of __debugfs_remove(), there are no guarantees whatsoever regarding the memory the corresponding inode's file_operations object had been kept in. Since __debugfs_remove() is seldomly invoked, usually from module exit handlers only, the race is hard to trigger and the impact is very low. A discussion of the problem outlined above as well as a suggested solution can be found in the (sub-)thread rooted at http://lkml.kernel.org/g/20130401203445.GA20862@xxxxxxxxxxxxxxxxxx ("Yet another pipe related oops.") Basically, Greg KH suggests to introduce an intermediate fops and Al Viro points out that a pointer to the original ones may be stored in ->d_fsdata. Follow this line of reasoning: - Add SRCU as a reverse dependency of DEBUG_FS. - Introduce a srcu_struct object for the debugfs subsystem. - In debugfs_create_file(), store a pointer to the original file_operations object in ->d_fsdata. - In __debugfs_remove(), clear out that dentry->d_fsdata member again. debugfs_remove() and debugfs_remove_recursive() wait for a SRCU grace period before returning to their caller. - Introduce an intermediate file_operations object named "debugfs_proxy_file_operations". It's ->open() functions checks, under the protection of a SRCU read lock, whether the "original" file_operations pointed to by ->d_fsdata is still valid and if so, tries to acquire a reference on the owning module. On success, it sets the file object's ->f_op to the original file_operations and forwards the ongoing open() call to the original ->open(). - For clarity, rename the former debugfs_file_operations to debugfs_noop_file_operations -- they are in no way canonical. The choice of SRCU over "normal" RCU is justified by the fact, that the former may also be used to protect ->i_private data from going away during the execution of a file's readers and writers which may (and do) sleep. Signed-off-by: Nicolai Stange <nicstange@xxxxxxxxx> --- Applicable to the Linus tree. The second of the two patches depends on the first one fs/debugfs/file.c | 29 ++++++++++++++++++++++++++++- fs/debugfs/inode.c | 16 +++++++++++++--- include/linux/debugfs.h | 6 +++++- lib/Kconfig.debug | 1 + 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index d2ba12e..67df2c9 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -35,13 +35,40 @@ static ssize_t default_write_file(struct file *file, const char __user *buf, return count; } -const struct file_operations debugfs_file_operations = { +const struct file_operations debugfs_noop_file_operations = { .read = default_read_file, .write = default_write_file, .open = simple_open, .llseek = noop_llseek, }; +static int proxy_open(struct inode *inode, struct file *filp) +{ + struct dentry * const dentry = filp->f_path.dentry; + const struct file_operations __rcu *rcu_real_fops; + const struct file_operations *real_fops; + int srcu_idx; + + srcu_idx = srcu_read_lock(&debugfs_srcu); + rcu_real_fops = (const struct file_operations __rcu *)dentry->d_fsdata; + real_fops = srcu_dereference(rcu_real_fops, &debugfs_srcu); + real_fops = fops_get(real_fops); + srcu_read_unlock(&debugfs_srcu, srcu_idx); + + if (!real_fops) + return -ENOENT; + + replace_fops(filp, real_fops); + if (filp->f_op->open) + return filp->f_op->open(inode, filp); + + return 0; +} + +const struct file_operations debugfs_proxy_file_operations = { + .open = proxy_open, +}; + static struct dentry *debugfs_create_mode(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b7fcc0d..8ae2e1a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -30,6 +30,8 @@ #define DEBUGFS_DEFAULT_MODE 0700 +DEFINE_SRCU(debugfs_srcu); + static struct vfsmount *debugfs_mount; static int debugfs_mount_count; static bool debugfs_registered; @@ -340,8 +342,12 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, return failed_creating(dentry); inode->i_mode = mode; - inode->i_fop = fops ? fops : &debugfs_file_operations; inode->i_private = data; + + inode->i_fop = fops ? &debugfs_proxy_file_operations + : &debugfs_noop_file_operations; + rcu_assign_pointer(dentry->d_fsdata, (void *)fops); + d_instantiate(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); @@ -523,10 +529,12 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) if (simple_positive(dentry)) { dget(dentry); - if (d_is_dir(dentry)) + if (d_is_dir(dentry)) { ret = simple_rmdir(d_inode(parent), dentry); - else + } else { simple_unlink(d_inode(parent), dentry); + rcu_assign_pointer(dentry->d_fsdata, NULL); + } if (!ret) d_delete(dentry); dput(dentry); @@ -565,6 +573,7 @@ void debugfs_remove(struct dentry *dentry) mutex_unlock(&d_inode(parent)->i_mutex); if (!ret) simple_release_fs(&debugfs_mount, &debugfs_mount_count); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove); @@ -642,6 +651,7 @@ void debugfs_remove_recursive(struct dentry *dentry) if (!__debugfs_remove(child, parent)) simple_release_fs(&debugfs_mount, &debugfs_mount_count); mutex_unlock(&d_inode(parent)->i_mutex); + synchronize_srcu(&debugfs_srcu); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 19c066d..f8c7494 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -19,6 +19,7 @@ #include <linux/seq_file.h> #include <linux/types.h> +#include <linux/srcu.h> struct device; struct file_operations; @@ -44,7 +45,10 @@ extern struct dentry *arch_debugfs_dir; #if defined(CONFIG_DEBUG_FS) /* declared over in file.c */ -extern const struct file_operations debugfs_file_operations; +extern const struct file_operations debugfs_noop_file_operations; +extern const struct file_operations debugfs_proxy_file_operations; + +extern struct srcu_struct debugfs_srcu; struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8c15b29..3929878 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -257,6 +257,7 @@ config PAGE_OWNER config DEBUG_FS bool "Debug Filesystem" + select SRCU help debugfs is a virtual file system that kernel developers use to put debugging files into. Enable this option to be able to read and -- 2.6.3 -- To unsubscribe from this list: send the line "unsubscribe linux-doc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html