On Wed, Nov 13, 2024 at 5:30 AM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote: > > On Wed, Nov 13, 2024 at 01:19:54AM +0000, Al Viro wrote: > > On Tue, Nov 12, 2024 at 04:38:42PM -0800, Linus Torvalds wrote: > > > Looking at that locking code in fadvise() just for the f_mode use does > > > make me think this would be a really good cleanup. > > > > > > I note that our fcntl code seems buggy as-is, because while it does > > > use f_lock for assignments (good), it clearly does *not* use them for > > > reading. > > > > > > So it looks like you can actually read inconsistent values. > > > > > > I get the feeling that f_flags would want WRITE_ONCE/READ_ONCE in > > > _addition_ to the f_lock use it has. > > > > AFAICS, fasync logics is the fishy part - the rest should be sane. > > > > > The f_mode thing with fadvise() smells like the same bug. Just because > > > the modifications are serialized wrt each other doesn't mean that > > > readers are then automatically ok. > > > > Reads are also under ->f_lock in there, AFAICS... > > > > Another thing in the vicinity is ->f_mode modifications after the calls > > of anon_inode_getfile() in several callers - probably ought to switch > > those to anon_inode_getfile_fmode(). That had been discussed back in > > April when the function got merged, but "convert to using it" followup > > series hadn't materialized... > > While we are at it, there's is a couple of kludges I really hate - > mixing __FMODE_NONOTIFY and __FMODE_EXEC with O_... flags. > > E.g. for __FMODE_NONOTIFY all it takes is switching fanotify from > anon_inode_getfd() to anon_inode_getfile_fmode() and adding > a dentry_open_nonotify() to be used by fanotify on the other path. > That's it - no more weird shit in OPEN_FMODE(), etc. > > For __FMODE_EXEC it might get trickier (nfs is the main consumer), > but I seriously suspect that something like "have path_openat() > check op->acc_mode & MAY_EXEC and set FMODE_EXEC in ->f_mode > right after struct file allocation" would make a good starting > point; yes, it would affect uselib(2), but... I've no idea whether > it wouldn't be the right thing to do; would be hard to test. > > Anyway, untested __FMODE_NONOTIFY side of it: > > diff --git a/fs/fcntl.c b/fs/fcntl.c > index 22dd9dcce7ec..ebd1c82bfb6b 100644 > --- a/fs/fcntl.c > +++ b/fs/fcntl.c > @@ -1161,10 +1161,10 @@ static int __init fcntl_init(void) > * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY > * is defined as O_NONBLOCK on some platforms and not on others. > */ > - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != > + BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != > HWEIGHT32( > (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | > - __FMODE_EXEC | __FMODE_NONOTIFY)); > + __FMODE_EXEC)); > > fasync_cache = kmem_cache_create("fasync_cache", > sizeof(struct fasync_struct), 0, > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c > index 9644bc72e457..43fbf29ef03a 100644 > --- a/fs/notify/fanotify/fanotify_user.c > +++ b/fs/notify/fanotify/fanotify_user.c > @@ -101,8 +101,7 @@ static void __init fanotify_sysctls_init(void) > * > * Internal and external open flags are stored together in field f_flags of > * struct file. Only external open flags shall be allowed in event_f_flags. > - * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be > - * excluded. > + * Internal flags like FMODE_EXEC shall be excluded. > */ > #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ > O_ACCMODE | O_APPEND | O_NONBLOCK | \ > @@ -262,8 +261,8 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, > * we need a new file handle for the userspace program so it can read even if it was > * originally opened O_WRONLY. > */ > - new_file = dentry_open(path, > - group->fanotify_data.f_flags | __FMODE_NONOTIFY, > + new_file = dentry_open_nonotify(path, > + group->fanotify_data.f_flags, > current_cred()); > if (IS_ERR(new_file)) { > /* > @@ -1404,6 +1403,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > unsigned int fid_mode = flags & FANOTIFY_FID_BITS; > unsigned int class = flags & FANOTIFY_CLASS_BITS; > unsigned int internal_flags = 0; > + struct file *file; > > pr_debug("%s: flags=%x event_f_flags=%x\n", > __func__, flags, event_f_flags); > @@ -1472,7 +1472,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) > return -EINVAL; > > - f_flags = O_RDWR | __FMODE_NONOTIFY; > + f_flags = O_RDWR; > if (flags & FAN_CLOEXEC) > f_flags |= O_CLOEXEC; > if (flags & FAN_NONBLOCK) > @@ -1550,10 +1550,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) > goto out_destroy_group; > } > > - fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); > + fd = get_unused_fd_flags(flags); > if (fd < 0) > goto out_destroy_group; > > + file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, > + f_flags, FMODE_NONOTIFY); > + if (IS_ERR(file)) { > + fd = PTR_ERR(file); > + put_unused_fd(fd); > + goto out_destroy_group; > + } > + fd_install(fd, file); > return fd; > > out_destroy_group: > diff --git a/fs/open.c b/fs/open.c > index acaeb3e25c88..04cb581528ff 100644 > --- a/fs/open.c > +++ b/fs/open.c > @@ -1118,6 +1118,23 @@ struct file *dentry_open(const struct path *path, int flags, > } > EXPORT_SYMBOL(dentry_open); > > +struct file *dentry_open_nonotify(const struct path *path, int flags, > + const struct cred *cred) > +{ > + struct file *f = alloc_empty_file(flags, cred); > + if (!IS_ERR(f)) { > + int error; > + > + f->f_mode |= FMODE_NONOTIFY; > + error = vfs_open(path, f); > + if (error) { > + fput(f); > + f = ERR_PTR(error); > + } > + } > + return f; > +} > + > /** > * dentry_create - Create and open a file > * @path: path to create > @@ -1215,7 +1232,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) > inline int build_open_flags(const struct open_how *how, struct open_flags *op) > { > u64 flags = how->flags; > - u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; > + u64 strip = O_CLOEXEC; > int lookup_flags = 0; > int acc_mode = ACC_MODE(flags); > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index e3c603d01337..18888d601550 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2731,6 +2731,8 @@ struct file *dentry_open(const struct path *path, int flags, > struct file *dentry_create(const struct path *path, int flags, umode_t mode, > const struct cred *cred); > struct path *backing_file_user_path(struct file *f); > +struct file *dentry_open_nonotify(const struct path *path, int flags, > + const struct cred *creds); > > /* > * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file > @@ -3620,11 +3622,9 @@ struct ctl_table; > int __init list_bdev_fs_names(char *buf, size_t size); > > #define __FMODE_EXEC ((__force int) FMODE_EXEC) > -#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) > > #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) > -#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ > - (flag & __FMODE_NONOTIFY))) > +#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE))) > > static inline bool is_sxid(umode_t mode) > { > diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h > index 80f37a0d40d7..613475285643 100644 > --- a/include/uapi/asm-generic/fcntl.h > +++ b/include/uapi/asm-generic/fcntl.h > @@ -6,7 +6,6 @@ > > /* > * FMODE_EXEC is 0x20 > - * FMODE_NONOTIFY is 0x4000000 > * These cannot be used by userspace O_* until internal and external open > * flags are split. > * -Eric Paris Nice. I will take it for a test drive. Thanks, Amir.