[PATCH 1/2] fs: add support for LOOKUP_NONBLOCK

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



io_uring always punts opens to async context, since there's no control
over whether the lookup blocks or not. Add LOOKUP_NONBLOCK to support
just doing the fast RCU based lookups, which we know will not block. If
we can do a cached path resolution of the filename, then we don't have
to always punt lookups for a worker.

During path resolution, we always do LOOKUP_RCU first. If that fails and
we terminate LOOKUP_RCU, then fail a LOOKUP_NONBLOCK attempt as well.

Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 fs/namei.c            | 60 +++++++++++++++++++++++++++++++------------
 include/linux/namei.h |  1 +
 2 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 03d0e11e4f36..3d86915568fa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -679,7 +679,7 @@ static bool legitimize_root(struct nameidata *nd)
  * Nothing should touch nameidata between unlazy_walk() failure and
  * terminate_walk().
  */
-static int unlazy_walk(struct nameidata *nd)
+static int complete_walk_rcu(struct nameidata *nd)
 {
 	struct dentry *parent = nd->path.dentry;
 
@@ -704,6 +704,18 @@ static int unlazy_walk(struct nameidata *nd)
 	return -ECHILD;
 }
 
+static int unlazy_walk(struct nameidata *nd)
+{
+	int ret;
+
+	ret = complete_walk_rcu(nd);
+	/* If caller is asking for NONBLOCK lookup, assume we can't satisfy it */
+	if (!ret && (nd->flags & LOOKUP_NONBLOCK))
+		ret = -EAGAIN;
+
+	return ret;
+}
+
 /**
  * unlazy_child - try to switch to ref-walk mode.
  * @nd: nameidata pathwalk data
@@ -764,10 +776,13 @@ static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned se
 
 static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
+	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+		if ((flags & (LOOKUP_RCU | LOOKUP_NONBLOCK)) == LOOKUP_NONBLOCK)
+			return -EAGAIN;
 		return dentry->d_op->d_revalidate(dentry, flags);
-	else
-		return 1;
+	}
+
+	return 1;
 }
 
 /**
@@ -792,7 +807,7 @@ static int complete_walk(struct nameidata *nd)
 		 */
 		if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED)))
 			nd->root.mnt = NULL;
-		if (unlikely(unlazy_walk(nd)))
+		if (unlikely(complete_walk_rcu(nd)))
 			return -ECHILD;
 	}
 
@@ -1466,8 +1481,9 @@ static struct dentry *lookup_fast(struct nameidata *nd,
 		unsigned seq;
 		dentry = __d_lookup_rcu(parent, &nd->last, &seq);
 		if (unlikely(!dentry)) {
-			if (unlazy_walk(nd))
-				return ERR_PTR(-ECHILD);
+			int ret = unlazy_walk(nd);
+			if (ret)
+				return ERR_PTR(ret);
 			return NULL;
 		}
 
@@ -1569,8 +1585,9 @@ static inline int may_lookup(struct nameidata *nd)
 		int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
 		if (err != -ECHILD)
 			return err;
-		if (unlazy_walk(nd))
-			return -ECHILD;
+		err = unlazy_walk(nd);
+		if (err)
+			return err;
 	}
 	return inode_permission(nd->inode, MAY_EXEC);
 }
@@ -1591,9 +1608,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
 		// we need to grab link before we do unlazy.  And we can't skip
 		// unlazy even if we fail to grab the link - cleanup needs it
 		bool grabbed_link = legitimize_path(nd, link, seq);
+		int ret;
 
-		if (unlazy_walk(nd) != 0 || !grabbed_link)
-			return -ECHILD;
+		ret = unlazy_walk(nd);
+		if (ret && !grabbed_link)
+			return ret;
 
 		if (nd_alloc_stack(nd))
 			return 0;
@@ -1634,8 +1653,9 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
 		touch_atime(&last->link);
 		cond_resched();
 	} else if (atime_needs_update(&last->link, inode)) {
-		if (unlikely(unlazy_walk(nd)))
-			return ERR_PTR(-ECHILD);
+		error = unlazy_walk(nd);
+		if (unlikely(error))
+			return ERR_PTR(error);
 		touch_atime(&last->link);
 	}
 
@@ -1652,8 +1672,9 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
 		if (nd->flags & LOOKUP_RCU) {
 			res = get(NULL, inode, &last->done);
 			if (res == ERR_PTR(-ECHILD)) {
-				if (unlikely(unlazy_walk(nd)))
-					return ERR_PTR(-ECHILD);
+				error = unlazy_walk(nd);
+				if (unlikely(error))
+					return ERR_PTR(error);
 				res = get(link->dentry, inode, &last->done);
 			}
 		} else {
@@ -2193,8 +2214,9 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 		}
 		if (unlikely(!d_can_lookup(nd->path.dentry))) {
 			if (nd->flags & LOOKUP_RCU) {
-				if (unlazy_walk(nd))
-					return -ECHILD;
+				err = unlazy_walk(nd);
+				if (err)
+					return err;
 			}
 			return -ENOTDIR;
 		}
@@ -3394,10 +3416,14 @@ struct file *do_filp_open(int dfd, struct filename *pathname,
 
 	set_nameidata(&nd, dfd, pathname);
 	filp = path_openat(&nd, op, flags | LOOKUP_RCU);
+	/* If we fail RCU lookup, assume NONBLOCK cannot be honored */
+	if (flags & LOOKUP_NONBLOCK)
+		goto out;
 	if (unlikely(filp == ERR_PTR(-ECHILD)))
 		filp = path_openat(&nd, op, flags);
 	if (unlikely(filp == ERR_PTR(-ESTALE)))
 		filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
+out:
 	restore_nameidata();
 	return filp;
 }
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a4bb992623c4..c36c4e0805fc 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -46,6 +46,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
 #define LOOKUP_NO_XDEV		0x040000 /* No mountpoint crossing. */
 #define LOOKUP_BENEATH		0x080000 /* No escaping from starting point. */
 #define LOOKUP_IN_ROOT		0x100000 /* Treat dirfd as fs root. */
+#define LOOKUP_NONBLOCK		0x200000 /* don't block for lookup */
 /* LOOKUP_* flags which do scope-related checks based on the dirfd. */
 #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT)
 
-- 
2.29.2




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux