On Thu, 3 Aug 2023 at 02:53, Mateusz Guzik <mjguzik@xxxxxxxxx> wrote: > > So yes, atomics remain expensive on x86-64 even on a very moden uarch > and their impact is measurable in a syscall like read. Well, a patch like this should fix it. I intentionally didn't bother with the alpha osf version of readdir, because nobody cares, but I guess we could do this in the header too. Or we could have split the FMODE_ATOMIC_POS bit into two, and had a "ALWAYS" version and a regular version, but just having a "fdget_dir()" made it simpler. So this - together with just reverting commit 20ea1e7d13c1 ("file: always lock position for FMODE_ATOMIC_POS") - *should* fix any performance regression. But I have not tested it at all. So.... Linus
fs/readdir.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/readdir.c b/fs/readdir.c index b264ce60114d..a43946d026ba 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,6 +24,27 @@ #include <asm/unaligned.h> +/* + * fd{get,put}_dir() is like fdget_pos(), but does the + * locking unconditionally, since it's a correctness issue, + * not a "follow POSIX" issue. + * + * pidfd_getfd() can violate the POSIX rules otherwise. + */ +static inline struct fd fdget_dir(int fd) +{ + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); + + if (file) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + return __to_fd(v); +} + +#define fdput_dir(x) fdput_pos(x) + /* * Note the "unsafe_put_user() semantics: we goto a * label for errors. @@ -181,7 +202,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; - struct fd f = fdget_pos(fd); + struct fd f = fdget_dir(fd); struct readdir_callback buf = { .ctx.actor = fillonedir, .dirent = dirent @@ -194,7 +215,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fdput_pos(f); + fdput_dir(f); return error; } @@ -279,7 +300,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, }; int error; - f = fdget_pos(fd); + f = fdget_dir(fd); if (!f.file) return -EBADF; @@ -295,7 +316,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fdput_pos(f); + fdput_dir(f); return error; } @@ -362,7 +383,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, }; int error; - f = fdget_pos(fd); + f = fdget_dir(fd); if (!f.file) return -EBADF; @@ -379,7 +400,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } - fdput_pos(f); + fdput_dir(f); return error; } @@ -439,7 +460,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; - struct fd f = fdget_pos(fd); + struct fd f = fdget_dir(fd); struct compat_readdir_callback buf = { .ctx.actor = compat_fillonedir, .dirent = dirent @@ -452,7 +473,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fdput_pos(f); + fdput_dir(f); return error; } @@ -530,7 +551,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, }; int error; - f = fdget_pos(fd); + f = fdget_dir(fd); if (!f.file) return -EBADF; @@ -546,7 +567,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fdput_pos(f); + fdput_dir(f); return error; } #endif