Opening a file grabs a reference on the terminal dentry in __legitimize_path(), then another one in do_dentry_open() and finally drops the initial reference in terminate_walk(). That's 2 modifications which don't need to be there -- do_dentry_open can consume the already held reference instead. In order to facilitate some debugging a dedicated namei state flag was added to denote this happened. When benchmarking on a 20-core vm using will-it-scale's open3_processes ("Same file open/close"), the results are (ops/s): before: 3087010 after: 4173977 (+35%) Signed-off-by: Mateusz Guzik <mjguzik@xxxxxxxxx> --- - drop the debug flag - tweak commentary - make vfs_open_consume clean up the path perhaps this will do the trick? :) fs/internal.h | 3 ++- fs/namei.c | 15 ++++++++++++--- fs/open.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index cdd73209eecb..6899e6ec9394 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -193,7 +193,8 @@ int chmod_common(const struct path *path, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); int chown_common(const struct path *path, uid_t user, gid_t group); -extern int vfs_open(const struct path *, struct file *); +int vfs_open_consume(struct path *, struct file *); +int vfs_open(const struct path *, struct file *); /* * inode.c diff --git a/fs/namei.c b/fs/namei.c index 1bf081959066..44ea5353ae26 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3683,6 +3683,7 @@ static const char *open_last_lookups(struct nameidata *nd, static int do_open(struct nameidata *nd, struct file *file, const struct open_flags *op) { + struct vfsmount *mnt; struct mnt_idmap *idmap; int open_flag = op->open_flag; bool do_truncate; @@ -3720,11 +3721,17 @@ static int do_open(struct nameidata *nd, error = mnt_want_write(nd->path.mnt); if (error) return error; + /* + * We grab an additional reference here because after the call to + * vfs_open_consume() we no longer own the reference in nd->path.mnt + * while we need to undo write access below. + */ + mnt = mntget(nd->path.mnt); do_truncate = true; } error = may_open(idmap, &nd->path, acc_mode, open_flag); if (!error && !(file->f_mode & FMODE_OPENED)) - error = vfs_open(&nd->path, file); + error = vfs_open_consume(&nd->path, file); if (!error) error = security_file_post_open(file, op->acc_mode); if (!error && do_truncate) @@ -3733,8 +3740,10 @@ static int do_open(struct nameidata *nd, WARN_ON(1); error = -EINVAL; } - if (do_truncate) - mnt_drop_write(nd->path.mnt); + if (do_truncate) { + mnt_drop_write(mnt); + mntput(mnt); + } return error; } diff --git a/fs/open.c b/fs/open.c index 22adbef7ecc2..2fdfb3133d0f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -905,6 +905,15 @@ static inline int file_get_write_access(struct file *f) return error; } +/* + * Populate struct file + * + * NOTE: it assumes f_path is populated and consumes the caller's reference. + * + * The caller must not path_put on it regardless of the error code -- the + * routine will either clean it up on its own or rely on fput, which must + * be issued anyway. + */ static int do_dentry_open(struct file *f, int (*open)(struct inode *, struct file *)) { @@ -912,7 +921,6 @@ static int do_dentry_open(struct file *f, struct inode *inode = f->f_path.dentry->d_inode; int error; - path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; f->f_wb_err = filemap_sample_wb_err(f->f_mapping); @@ -1045,6 +1053,7 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */ file->f_path.dentry = dentry; + path_get(&file->f_path); return do_dentry_open(file, open); } EXPORT_SYMBOL(finish_open); @@ -1077,15 +1086,22 @@ char *file_path(struct file *filp, char *buf, int buflen) EXPORT_SYMBOL(file_path); /** - * vfs_open - open the file at the given path + * vfs_open_consume - open the file at the given path and consume the reference * @path: path to open * @file: newly allocated file with f_flag initialized */ -int vfs_open(const struct path *path, struct file *file) +int vfs_open_consume(struct path *path, struct file *file) { int ret; file->f_path = *path; + path->mnt = NULL; + path->dentry = NULL; + + /* + * do_dentry_open() does the reference consuming regardless of its + * return value + */ ret = do_dentry_open(file, NULL); if (!ret) { /* @@ -1098,6 +1114,27 @@ int vfs_open(const struct path *path, struct file *file) return ret; } +/** + * vfs_open - open the file at the given path + * @path: path to open + * @file: newly allocated file with f_flag initialized + * + * See commentary in vfs_open_consume. The difference here is that this routine + * grabs its own reference and does not clean up the passed path. + */ +int vfs_open(const struct path *path, struct file *file) +{ + int ret; + + file->f_path = *path; + path_get(&file->f_path); + ret = do_dentry_open(file, NULL); + if (!ret) { + fsnotify_open(file); + } + return ret; +} + struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { @@ -1183,6 +1220,7 @@ struct file *kernel_file_open(const struct path *path, int flags, return f; f->f_path = *path; + path_get(&f->f_path); error = do_dentry_open(f, NULL); if (error) { fput(f); -- 2.43.0