io_uring always punts opens to async context, since there's no control over whether the lookup blocks or not. Add LOOKUP_NONBLOCK to support just doing the fast RCU based lookups, which we know will not block. If we can do a cached path resolution of the filename, then we don't have to always punt lookups for a worker. During path resolution, we always do LOOKUP_RCU first. If that fails and we terminate LOOKUP_RCU, then fail a LOOKUP_NONBLOCK attempt as well. Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/namei.c | 60 +++++++++++++++++++++++++++++++------------ include/linux/namei.h | 1 + 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 03d0e11e4f36..3d86915568fa 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -679,7 +679,7 @@ static bool legitimize_root(struct nameidata *nd) * Nothing should touch nameidata between unlazy_walk() failure and * terminate_walk(). */ -static int unlazy_walk(struct nameidata *nd) +static int complete_walk_rcu(struct nameidata *nd) { struct dentry *parent = nd->path.dentry; @@ -704,6 +704,18 @@ static int unlazy_walk(struct nameidata *nd) return -ECHILD; } +static int unlazy_walk(struct nameidata *nd) +{ + int ret; + + ret = complete_walk_rcu(nd); + /* If caller is asking for NONBLOCK lookup, assume we can't satisfy it */ + if (!ret && (nd->flags & LOOKUP_NONBLOCK)) + ret = -EAGAIN; + + return ret; +} + /** * unlazy_child - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -764,10 +776,13 @@ static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned se static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { - if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { + if ((flags & (LOOKUP_RCU | LOOKUP_NONBLOCK)) == LOOKUP_NONBLOCK) + return -EAGAIN; return dentry->d_op->d_revalidate(dentry, flags); - else - return 1; + } + + return 1; } /** @@ -792,7 +807,7 @@ static int complete_walk(struct nameidata *nd) */ if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED))) nd->root.mnt = NULL; - if (unlikely(unlazy_walk(nd))) + if (unlikely(complete_walk_rcu(nd))) return -ECHILD; } @@ -1466,8 +1481,9 @@ static struct dentry *lookup_fast(struct nameidata *nd, unsigned seq; dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (unlikely(!dentry)) { - if (unlazy_walk(nd)) - return ERR_PTR(-ECHILD); + int ret = unlazy_walk(nd); + if (ret) + return ERR_PTR(ret); return NULL; } @@ -1569,8 +1585,9 @@ static inline int may_lookup(struct nameidata *nd) int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; - if (unlazy_walk(nd)) - return -ECHILD; + err = unlazy_walk(nd); + if (err) + return err; } return inode_permission(nd->inode, MAY_EXEC); } @@ -1591,9 +1608,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) // we need to grab link before we do unlazy. And we can't skip // unlazy even if we fail to grab the link - cleanup needs it bool grabbed_link = legitimize_path(nd, link, seq); + int ret; - if (unlazy_walk(nd) != 0 || !grabbed_link) - return -ECHILD; + ret = unlazy_walk(nd); + if (ret && !grabbed_link) + return ret; if (nd_alloc_stack(nd)) return 0; @@ -1634,8 +1653,9 @@ static const char *pick_link(struct nameidata *nd, struct path *link, touch_atime(&last->link); cond_resched(); } else if (atime_needs_update(&last->link, inode)) { - if (unlikely(unlazy_walk(nd))) - return ERR_PTR(-ECHILD); + error = unlazy_walk(nd); + if (unlikely(error)) + return ERR_PTR(error); touch_atime(&last->link); } @@ -1652,8 +1672,9 @@ static const char *pick_link(struct nameidata *nd, struct path *link, if (nd->flags & LOOKUP_RCU) { res = get(NULL, inode, &last->done); if (res == ERR_PTR(-ECHILD)) { - if (unlikely(unlazy_walk(nd))) - return ERR_PTR(-ECHILD); + error = unlazy_walk(nd); + if (unlikely(error)) + return ERR_PTR(error); res = get(link->dentry, inode, &last->done); } } else { @@ -2193,8 +2214,9 @@ static int link_path_walk(const char *name, struct nameidata *nd) } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd)) - return -ECHILD; + err = unlazy_walk(nd); + if (err) + return err; } return -ENOTDIR; } @@ -3394,10 +3416,14 @@ struct file *do_filp_open(int dfd, struct filename *pathname, set_nameidata(&nd, dfd, pathname); filp = path_openat(&nd, op, flags | LOOKUP_RCU); + /* If we fail RCU lookup, assume NONBLOCK cannot be honored */ + if (flags & LOOKUP_NONBLOCK) + goto out; if (unlikely(filp == ERR_PTR(-ECHILD))) filp = path_openat(&nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) filp = path_openat(&nd, op, flags | LOOKUP_REVAL); +out: restore_nameidata(); return filp; } diff --git a/include/linux/namei.h b/include/linux/namei.h index a4bb992623c4..c36c4e0805fc 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ +#define LOOKUP_NONBLOCK 0x200000 /* don't block for lookup */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) -- 2.29.2