In some scenarios, file systems might want to pass an already opened struct file instance on an open() call, instead of opening a new one. This allows similar techniques like the already well known file descriptor passing via Unix domain sockets, but now also for plain open() calls. Signed-off-by: Enrico Weigelt, metux IT consult <info@xxxxxxxxx> --- fs/Kconfig | 3 +++ fs/internal.h | 6 ++++++ fs/namei.c | 2 +- fs/open.c | 42 +++++++++++++++++++++++++++++++++++++++++- include/linux/fs.h | 9 +++++++++ 5 files changed, 60 insertions(+), 2 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index 141a856c50e7..b8b7a77b656c 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -357,4 +357,7 @@ source "fs/unicode/Kconfig" config IO_WQ bool +config FS_BOXED_FILE + bool + endmenu diff --git a/fs/internal.h b/fs/internal.h index 6aeae7ef3380..e5e9cf038a24 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -142,6 +142,12 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int chown_common(const struct path *path, uid_t user, gid_t group); extern int vfs_open(const struct path *, struct file *); +#ifdef CONFIG_FS_BOXED_FILE +extern struct file *unbox_file(struct file *); +#else +static inline struct file *unbox_file(struct file *f) { return f; } +#endif + /* * inode.c */ diff --git a/fs/namei.c b/fs/namei.c index 79b0ff9b151e..b186d2d75b63 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3496,7 +3496,7 @@ static struct file *path_openat(struct nameidata *nd, } if (likely(!error)) { if (likely(file->f_mode & FMODE_OPENED)) - return file; + return unbox_file(file); WARN_ON(1); error = -EINVAL; } diff --git a/fs/open.c b/fs/open.c index e53af13b5835..88daf09ffeb4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -769,6 +769,46 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) return ksys_fchown(fd, user, group); } +#ifdef CONFIG_FS_BOXED_FILE +/* + * Finish up an open procedure before returning the file to the caller. + * in case the the fs returns some unusual things like directly passing + * another file, this will be handled here. + * + * This function is only supposed to be called by functions like dentry_open() + * and path_openat() that allocate a new struct file and finally pass it to + * vfs_open() - the struct file should not have been used in any ways in the + * meantime, or unpleasant things may happen. + */ +struct file *unbox_file(struct file *f) +{ + struct file *boxed; + + if (unlikely(!f)) + return NULL; + + if (IS_ERR(f)) + return f; + + if (likely(!f->boxed_file)) + return f; + + /* the fs returned another struct file (f->lower_file) that should be + directly passed to our callers instead of the one that had been newly + created for the open procedure. + + the lower_file is already ref'ed, so we keep the refcount. + since the upper file (f) just had been opened, and no further access, + we can just call fput() on it. + */ + + boxed = f->boxed_file; + fput(f); + + return boxed; +} +#endif /* CONFIG_FS_BOXED_FILE */ + static int do_dentry_open(struct file *f, struct inode *inode, int (*open)(struct inode *, struct file *)) @@ -959,7 +999,7 @@ struct file *dentry_open(const struct path *path, int flags, f = ERR_PTR(error); } } - return f; + return unbox_file(f); } EXPORT_SYMBOL(dentry_open); diff --git a/include/linux/fs.h b/include/linux/fs.h index c3c88fdb9b2a..a778c5c057ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -955,6 +955,15 @@ struct file { struct address_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ + +#ifdef CONFIG_FS_BOXED_FILE + /* Only for file systems that wanna pass an *existing* file to the + caller of open() instead of the newly created one. This has similar + semantics like passing an fd via unix socket, but instead via some + open() call. + */ + struct file *boxed_file; +#endif } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ -- 2.20.1