io_uring always punts opens to async context, since there's no control over whether the lookup blocks or not. Add LOOKUP_NONBLOCK to support just doing the fast RCU based lookups, which we know will not block. If we can do a cached path resolution of the filename, then we don't have to always punt lookups for a worker. We explicitly disallow O_CREAT | O_TRUNC opens, as those will require blocking, and O_TMPFILE as that requires filesystem interactions and there's currently no way to pass down an attempt to do nonblocking operations there. This basically boils down to whether or not we can do the fast path of open or not. If we can't, then return -EAGAIN and let the caller retry from an appropriate context that can handle blocking. During path resolution, we always do LOOKUP_RCU first. If that fails and we terminate LOOKUP_RCU, then fail a LOOKUP_NONBLOCK attempt as well. Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- fs/namei.c | 27 ++++++++++++++++++++++++++- include/linux/namei.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 7eb7830da298..83a7f7866232 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -686,6 +686,8 @@ static bool try_to_unlazy(struct nameidata *nd) BUG_ON(!(nd->flags & LOOKUP_RCU)); nd->flags &= ~LOOKUP_RCU; + if (nd->flags & LOOKUP_NONBLOCK) + goto out1; if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) @@ -722,6 +724,8 @@ static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned se BUG_ON(!(nd->flags & LOOKUP_RCU)); nd->flags &= ~LOOKUP_RCU; + if (nd->flags & LOOKUP_NONBLOCK) + goto out2; if (unlikely(!legitimize_links(nd))) goto out2; if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) @@ -792,6 +796,7 @@ static int complete_walk(struct nameidata *nd) */ if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED))) nd->root.mnt = NULL; + nd->flags &= ~LOOKUP_NONBLOCK; if (!try_to_unlazy(nd)) return -ECHILD; } @@ -2202,6 +2207,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags) int error; const char *s = nd->name->name; + /* LOOKUP_NONBLOCK requires RCU, ask caller to retry */ + if ((flags & (LOOKUP_RCU | LOOKUP_NONBLOCK)) == LOOKUP_NONBLOCK) + return ERR_PTR(-EAGAIN); + if (!*s) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) @@ -3140,6 +3149,12 @@ static const char *open_last_lookups(struct nameidata *nd, return ERR_CAST(dentry); if (likely(dentry)) goto finish_lookup; + /* + * We can't guarantee nonblocking semantics beyond this, if + * the fast lookup fails. + */ + if (nd->flags & LOOKUP_NONBLOCK) + return ERR_PTR(-EAGAIN); BUG_ON(nd->flags & LOOKUP_RCU); } else { @@ -3233,6 +3248,7 @@ static int do_open(struct nameidata *nd, open_flag &= ~O_TRUNC; acc_mode = 0; } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) { + WARN_ON_ONCE(nd->flags & LOOKUP_NONBLOCK); error = mnt_want_write(nd->path.mnt); if (error) return error; @@ -3299,7 +3315,16 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags, { struct dentry *child; struct path path; - int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); + int error; + + /* + * We can't guarantee that the fs doesn't block further down, so + * just disallow nonblock attempts at O_TMPFILE for now. + */ + if (flags & LOOKUP_NONBLOCK) + return -EAGAIN; + + error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) return error; error = mnt_want_write(path.mnt); diff --git a/include/linux/namei.h b/include/linux/namei.h index a4bb992623c4..c36c4e0805fc 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; #define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ #define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ #define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ +#define LOOKUP_NONBLOCK 0x200000 /* don't block for lookup */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) -- 2.29.2