On Wed, Nov 13, 2024 at 01:19:54AM +0000, Al Viro wrote: > On Tue, Nov 12, 2024 at 04:38:42PM -0800, Linus Torvalds wrote: > > Looking at that locking code in fadvise() just for the f_mode use does > > make me think this would be a really good cleanup. > > > > I note that our fcntl code seems buggy as-is, because while it does > > use f_lock for assignments (good), it clearly does *not* use them for > > reading. > > > > So it looks like you can actually read inconsistent values. > > > > I get the feeling that f_flags would want WRITE_ONCE/READ_ONCE in > > _addition_ to the f_lock use it has. > > AFAICS, fasync logics is the fishy part - the rest should be sane. > > > The f_mode thing with fadvise() smells like the same bug. Just because > > the modifications are serialized wrt each other doesn't mean that > > readers are then automatically ok. > > Reads are also under ->f_lock in there, AFAICS... > > Another thing in the vicinity is ->f_mode modifications after the calls > of anon_inode_getfile() in several callers - probably ought to switch > those to anon_inode_getfile_fmode(). That had been discussed back in > April when the function got merged, but "convert to using it" followup > series hadn't materialized... While we are at it, there's is a couple of kludges I really hate - mixing __FMODE_NONOTIFY and __FMODE_EXEC with O_... flags. E.g. for __FMODE_NONOTIFY all it takes is switching fanotify from anon_inode_getfd() to anon_inode_getfile_fmode() and adding a dentry_open_nonotify() to be used by fanotify on the other path. That's it - no more weird shit in OPEN_FMODE(), etc. For __FMODE_EXEC it might get trickier (nfs is the main consumer), but I seriously suspect that something like "have path_openat() check op->acc_mode & MAY_EXEC and set FMODE_EXEC in ->f_mode right after struct file allocation" would make a good starting point; yes, it would affect uselib(2), but... I've no idea whether it wouldn't be the right thing to do; would be hard to test. Anyway, untested __FMODE_NONOTIFY side of it: diff --git a/fs/fcntl.c b/fs/fcntl.c index 22dd9dcce7ec..ebd1c82bfb6b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1161,10 +1161,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | - __FMODE_EXEC | __FMODE_NONOTIFY)); + __FMODE_EXEC)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 9644bc72e457..43fbf29ef03a 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -101,8 +101,7 @@ static void __init fanotify_sysctls_init(void) * * Internal and external open flags are stored together in field f_flags of * struct file. Only external open flags shall be allowed in event_f_flags. - * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be - * excluded. + * Internal flags like FMODE_EXEC shall be excluded. */ #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ O_ACCMODE | O_APPEND | O_NONBLOCK | \ @@ -262,8 +261,8 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, * we need a new file handle for the userspace program so it can read even if it was * originally opened O_WRONLY. */ - new_file = dentry_open(path, - group->fanotify_data.f_flags | __FMODE_NONOTIFY, + new_file = dentry_open_nonotify(path, + group->fanotify_data.f_flags, current_cred()); if (IS_ERR(new_file)) { /* @@ -1404,6 +1403,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; + struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1472,7 +1472,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; - f_flags = O_RDWR | __FMODE_NONOTIFY; + f_flags = O_RDWR; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) @@ -1550,10 +1550,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } - fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); + fd = get_unused_fd_flags(flags); if (fd < 0) goto out_destroy_group; + file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, + f_flags, FMODE_NONOTIFY); + if (IS_ERR(file)) { + fd = PTR_ERR(file); + put_unused_fd(fd); + goto out_destroy_group; + } + fd_install(fd, file); return fd; out_destroy_group: diff --git a/fs/open.c b/fs/open.c index acaeb3e25c88..04cb581528ff 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1118,6 +1118,23 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred) +{ + struct file *f = alloc_empty_file(flags, cred); + if (!IS_ERR(f)) { + int error; + + f->f_mode |= FMODE_NONOTIFY; + error = vfs_open(path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } + return f; +} + /** * dentry_create - Create and open a file * @path: path to create @@ -1215,7 +1232,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) inline int build_open_flags(const struct open_how *how, struct open_flags *op) { u64 flags = how->flags; - u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; + u64 strip = O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index e3c603d01337..18888d601550 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2731,6 +2731,8 @@ struct file *dentry_open(const struct path *path, int flags, struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *creds); /* * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file @@ -3620,11 +3622,9 @@ struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) -#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ - (flag & __FMODE_NONOTIFY))) +#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE))) static inline bool is_sxid(umode_t mode) { diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 80f37a0d40d7..613475285643 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -6,7 +6,6 @@ /* * FMODE_EXEC is 0x20 - * FMODE_NONOTIFY is 0x4000000 * These cannot be used by userspace O_* until internal and external open * flags are split. * -Eric Paris