Failed opens (mostly ENOENT) legitimately happen a lot, for example here are stats from stracing kernel build for few seconds (strace -fc make): % time seconds usecs/call calls errors syscall ------ ----------- ----------- --------- --------- ------------------ 0.76 0.076233 5 15040 3688 openat (this is tons of header files tried in different paths) Apart from a rare corner case where the file object is fully constructed and we need to abort, there is a lot of overhead which can be avoided. Most notably delegation of freeing to task_work, which comes with an enormous cost (see 021a160abf62 ("fs: use __fput_sync in close(2)" for an example). Benched with will-it-scale with a custom testcase based on tests/open1.c: [snip] while (1) { int fd = open("/tmp/nonexistent", O_RDONLY); assert(fd == -1); (*iterations)++; } [/snip] Sapphire Rapids, one worker in single-threaded case (ops/s): before: 1950013 after: 2914973 (+49%) Signed-off-by: Mateusz Guzik <mjguzik@xxxxxxxxx> --- fs/file_table.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/namei.c | 2 +- include/linux/file.h | 1 + 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/fs/file_table.c b/fs/file_table.c index ee21b3da9d08..320dc1f9aa0e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -82,6 +82,16 @@ static inline void file_free(struct file *f) call_rcu(&f->f_rcuhead, file_free_rcu); } +static inline void file_free_badopen(struct file *f) +{ + BUG_ON(f->f_mode & (FMODE_BACKING | FMODE_OPENED)); + security_file_free(f); + put_cred(f->f_cred); + if (likely(!(f->f_mode & FMODE_NOACCOUNT))) + percpu_counter_dec(&nr_files); + kmem_cache_free(filp_cachep, f); +} + /* * Return the total number of open files in the system */ @@ -468,6 +478,35 @@ void __fput_sync(struct file *file) EXPORT_SYMBOL(fput); EXPORT_SYMBOL(__fput_sync); +/* + * Clean up after failing to open (e.g., open(2) returns with -ENOENT). + * + * This represents opportunities to shave on work in the common case compared + * to the usual fput: + * 1. vast majority of the time FMODE_OPENED is not set, meaning there is no + * need to delegate to task_work + * 2. if the above holds then we are guaranteed we have the only reference with + * nobody else seeing the file, thus no need to use atomics to release it + * 3. then there is no need to delegate freeing to RCU + */ +void fput_badopen(struct file *file) +{ + if (unlikely(file->f_mode & (FMODE_BACKING | FMODE_OPENED))) { + fput(file); + return; + } + + if (WARN_ON(atomic_long_read(&file->f_count) != 1)) { + fput(file); + return; + } + + /* zero out the ref count to appease possible asserts */ + atomic_long_set(&file->f_count, 0); + file_free_badopen(file); +} +EXPORT_SYMBOL(fput_badopen); + void __init files_init(void) { filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, diff --git a/fs/namei.c b/fs/namei.c index 567ee547492b..67579fe30b28 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3802,7 +3802,7 @@ static struct file *path_openat(struct nameidata *nd, WARN_ON(1); error = -EINVAL; } - fput(file); + fput_badopen(file); if (error == -EOPENSTALE) { if (flags & LOOKUP_RCU) error = -ECHILD; diff --git a/include/linux/file.h b/include/linux/file.h index 6e9099d29343..96300e27d9a8 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -15,6 +15,7 @@ struct file; extern void fput(struct file *); +extern void fput_badopen(struct file *); struct file_operations; struct task_struct; -- 2.39.2