On 8/15/23 10:03, Miklos Szeredi wrote: > On Fri, 11 Aug 2023 at 20:38, Bernd Schubert <bschubert@xxxxxxx> wrote: >> >> From: Miklos Szeredi <miklos@xxxxxxxxxx> >> >> atomic_open() will do an open-by-name or create-and-open >> depending on the flags. >> >> If file was created, then the old positive dentry is obviously >> stale, so it will be invalidated and a new one will be allocated. >> >> If not created, then check whether it's the same inode (same as in >> ->d_revalidate()) and if not, invalidate & allocate new dentry. >> >> Changes from Miklos initial patch (by Bernd): >> - LOOKUP_ATOMIC_REVALIDATE was added and is set for revalidate >> calls into the file system when revalidate by atomic open is >> supported - this is to avoid that ->d_revalidate() would skip >> revalidate and set DCACHE_ATOMIC_OPEN, although vfs >> does not supported it in the given code path (for example >> when LOOKUP_RCU is set)). > > I don't get it. We don't get so far as to set DCACHE_ATOMIC_OPEN if > LOOKUP_RCU is set. See lookup_fast, there are two calls to d_revalidate() that have LOOKUP_ATOMIC_REVALIDATE and one in RCU mode that does not. With the new flag LOOKUP_ATOMIC_REVALIDATE we tell ->revalidate() that we are in code path that supports revalidating atomically. Sure, ror RCU we can/should always return -ECHILD in fuse_dentry_revalidate when LOOKUP_RCU is set. But then it is also easy to miss that - at a minimum we need to document that DCACHE_ATOMIC_OPEN must not be set in RCU mode. My main fear is that there are several callers of d_revalidate() and I don't easily see if any of these has LOOKUP_OPEN set - LOOKUP_ATOMIC_REVALIDATE is an additional protection to avoid accidentally skipping revalidate in code path that is not atomic-open. I will happily remove LOOKUP_ATOMIC_REVALIDATE if you are sure that LOOKUP_OPEN and LOOKUP_RCU are enough to check for. > >> - Support atomic-open-revalidate in lookup_fast() - allow atomic >> open for positive dentries without O_CREAT being set. >> >> Signed-off-by: Miklos Szeredi <miklos@xxxxxxxxxx> >> Signed-off-by: Bernd Schubert <bschubert@xxxxxxx> >> Cc: Christian Brauner <brauner@xxxxxxxxxx> >> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> >> Cc: Dharmendra Singh <dsingh@xxxxxxx> >> Cc: linux-fsdevel@xxxxxxxxxxxxxxx >> --- >> fs/fuse/dir.c | 5 ++--- >> fs/namei.c | 17 +++++++++++++---- >> include/linux/dcache.h | 6 ++++++ >> include/linux/namei.h | 1 + >> 4 files changed, 22 insertions(+), 7 deletions(-) >> >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c >> index c02b63fe91ca..8ccd49d63235 100644 >> --- a/fs/fuse/dir.c >> +++ b/fs/fuse/dir.c >> @@ -380,7 +380,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name >> if (name->len > FUSE_NAME_MAX) >> goto out; >> >> - >> forget = fuse_alloc_forget(); >> err = -ENOMEM; >> if (!forget) >> @@ -771,8 +770,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, >> } >> >> static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, >> - struct file *file, unsigned flags, >> - umode_t mode) >> + struct file *file, unsigned flags, >> + umode_t mode) >> { >> >> int err; >> diff --git a/fs/namei.c b/fs/namei.c >> index e4fe0879ae55..5dae1b1afd0e 100644 >> --- a/fs/namei.c >> +++ b/fs/namei.c >> @@ -1643,12 +1643,14 @@ static struct dentry *lookup_fast(struct nameidata *nd) >> return ERR_PTR(-ECHILD); >> if (status == -ECHILD) >> /* we'd been told to redo it in non-rcu mode */ >> - status = d_revalidate(dentry, nd->flags); >> + status = d_revalidate(dentry, >> + nd->flags | LOOKUP_ATOMIC_REVALIDATE); >> } else { >> dentry = __d_lookup(parent, &nd->last); >> if (unlikely(!dentry)) >> return NULL; >> - status = d_revalidate(dentry, nd->flags); >> + status = d_revalidate(dentry, >> + nd->flags | LOOKUP_ATOMIC_REVALIDATE); >> } >> if (unlikely(status <= 0)) { >> if (!status) >> @@ -1656,6 +1658,12 @@ static struct dentry *lookup_fast(struct nameidata *nd) >> dput(dentry); >> return ERR_PTR(status); >> } >> + >> + if (unlikely(d_atomic_open(dentry))) { >> + dput(dentry); >> + return NULL; >> + } >> + > > This looks like a hack. Why not move the d_atomic_open() check to the caller? Thanks, I moved it to open_last_lookups() and to walk_component(), with a warn in the latter. Updated patch below, I kept LOOKUP_ATOMIC_REVALIDATE for now. diff --git a/fs/namei.c b/fs/namei.c index e4fe0879ae55..19a4bb3a0e4e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1643,12 +1643,14 @@ static struct dentry *lookup_fast(struct nameidata *nd) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(dentry, + nd->flags | LOOKUP_ATOMIC_REVALIDATE); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; - status = d_revalidate(dentry, nd->flags); + status = d_revalidate(dentry, + nd->flags | LOOKUP_ATOMIC_REVALIDATE); } if (unlikely(status <= 0)) { if (!status) @@ -1656,6 +1658,7 @@ static struct dentry *lookup_fast(struct nameidata *nd) dput(dentry); return ERR_PTR(status); } + return dentry; } @@ -1999,6 +2002,9 @@ static const char *walk_component(struct nameidata *nd, int flags) if (IS_ERR(dentry)) return ERR_CAST(dentry); } + + WARN_ON(dentry && d_atomic_open(dentry)); + if (!(flags & WALK_MORE) && nd->depth) put_link(nd); return step_into(nd, flags, dentry); @@ -3421,7 +3427,8 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (d_in_lookup(dentry)) break; - error = d_revalidate(dentry, nd->flags); + error = d_revalidate(dentry, + nd->flags | LOOKUP_ATOMIC_REVALIDATE); if (likely(error > 0)) break; if (error) @@ -3430,7 +3437,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, dput(dentry); dentry = NULL; } - if (dentry->d_inode) { + if (dentry->d_inode && !d_atomic_open(dentry)) { /* Cached positive dentry: will open in f_op->open */ return dentry; } @@ -3529,9 +3536,12 @@ static const char *open_last_lookups(struct nameidata *nd, dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); + if (dentry && unlikely(d_atomic_open(dentry))) { + dput(dentry); + dentry = NULL; + } if (likely(dentry)) goto finish_lookup; - BUG_ON(nd->flags & LOOKUP_RCU); } else { /* create side of things */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6b351e009f59..f90eec22691c 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -208,6 +208,7 @@ struct dentry_operations { #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ #define DCACHE_NOKEY_NAME 0x02000000 /* Encrypted name encoded without key */ #define DCACHE_OP_REAL 0x04000000 +#define DCACHE_ATOMIC_OPEN 0x08000000 /* Always use ->atomic_open() to open this file */ #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ #define DCACHE_DENTRY_CURSOR 0x20000000 @@ -496,6 +497,11 @@ static inline bool d_is_fallthru(const struct dentry *dentry) return dentry->d_flags & DCACHE_FALLTHRU; } +static inline bool d_atomic_open(const struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_ATOMIC_OPEN; +} + extern int sysctl_vfs_cache_pressure; diff --git a/include/linux/namei.h b/include/linux/namei.h index 1463cbda4888..7eec6c06b192 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -33,6 +33,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_CREATE 0x0200 /* ... in object creation */ #define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */ #define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */ +#define LOOKUP_ATOMIC_REVALIDATE 0x1000 /* atomic revalidate possible */ /* internal use only */ #define LOOKUP_PARENT 0x0010