[PATCH v2] fs: try an opportunistic lookup for O_CREAT opens too

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Today, when opening a file we'll typically do a fast lookup, but if
O_CREAT is set, the kernel always takes the exclusive inode lock. I
assume this was done with the expectation that O_CREAT means that we
always expect to do the create, but that's often not the case. Many
programs set O_CREAT even in scenarios where the file already exists.

This patch rearranges the pathwalk-for-open code to also attempt a
fast_lookup in certain O_CREAT cases. If a positive dentry is found, the
inode_lock can be avoided altogether, and if auditing isn't enabled, it
can stay in rcuwalk mode for the last step_into.

One notable exception that is hopefully temporary: if we're doing an
rcuwalk and auditing is enabled, skip the lookup_fast. Legitimizing the
dentry in that case is more expensive than taking the i_rwsem for now.

Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
Here's a revised patch that does a fast_lookup in the O_CREAT codepath
too. The main difference here is that if a positive dentry is found and
audit_dummy_context is true, then we keep the walk lazy for the last
component, which avoids having to take any locks on the parent (just
like with non-O_CREAT opens).

The testcase below runs in about 18s on v6.10 (on an 80 CPU machine).
With this patch, it runs in about 1s:

 #define _GNU_SOURCE 1
 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <sys/wait.h>

 #define PROCS           70
 #define LOOPS           500000

static int openloop(int tnum)
{
	char *file;
	int i, ret;

       	ret = asprintf(&file, "./testfile%d", tnum);
	if (ret < 0) {
		printf("asprintf failed for proc %d", tnum);
		return 1;
	}

	for (i = 0; i < LOOPS; ++i) {
		int fd = open(file, O_RDWR|O_CREAT, 0644);

		if (fd < 0) {
			perror("open");
			return 1;
		}
		close(fd);
	}
	unlink(file);
	free(file);
	return 0;
}

int main(int argc, char **argv) {
	pid_t kids[PROCS];
	int i, ret = 0;

	for (i = 0; i < PROCS; ++i) {
		kids[i] = fork();
		if (kids[i] > 0)
			return openloop(i);
		if (kids[i] < 0)
			perror("fork");
	}

	for (i = 0; i < PROCS; ++i) {
		int ret2;

		if (kids[i] > 0) {
			wait(&ret2);
			if (ret2 != 0)
				ret = ret2;
		}
	}
	return ret;
}
---
Changes in v2:
- drop the lockref patch since Mateusz is working on a better approach
- add trailing_slashes helper function
- add a lookup_fast_for_open helper function
- make lookup_fast_for_open skip the lookup if auditing is enabled
- if we find a positive dentry and auditing is disabled, don't unlazy
- Link to v1: https://lore.kernel.org/r/20240802-openfast-v1-0-a1cff2a33063@xxxxxxxxxx
---
 fs/namei.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 9 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 1e05a0f3f04d..2d716fb114c9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3518,6 +3518,47 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
 	return ERR_PTR(error);
 }
 
+static inline bool trailing_slashes(struct nameidata *nd)
+{
+	return (bool)nd->last.name[nd->last.len];
+}
+
+static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
+{
+	struct dentry *dentry;
+
+	if (open_flag & O_CREAT) {
+		/* Don't bother on an O_EXCL create */
+		if (open_flag & O_EXCL)
+			return NULL;
+
+		/*
+		 * FIXME: If auditing is enabled, then we'll have to unlazy to
+		 * use the dentry. For now, don't do this, since it shifts
+		 * contention from parent's i_rwsem to its d_lockref spinlock.
+		 * Reconsider this once dentry refcounting handles heavy
+		 * contention better.
+		 */
+		if ((nd->flags & LOOKUP_RCU) && !audit_dummy_context())
+			return NULL;
+	}
+
+	if (trailing_slashes(nd))
+		nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+	dentry = lookup_fast(nd);
+
+	if (open_flag & O_CREAT) {
+		/* Discard negative dentries. Need inode_lock to do the create */
+		if (dentry && !dentry->d_inode) {
+			if (!(nd->flags & LOOKUP_RCU))
+				dput(dentry);
+			dentry = NULL;
+		}
+	}
+	return dentry;
+}
+
 static const char *open_last_lookups(struct nameidata *nd,
 		   struct file *file, const struct open_flags *op)
 {
@@ -3535,28 +3576,31 @@ static const char *open_last_lookups(struct nameidata *nd,
 		return handle_dots(nd, nd->last_type);
 	}
 
+	/* We _can_ be in RCU mode here */
+	dentry = lookup_fast_for_open(nd, open_flag);
+	if (IS_ERR(dentry))
+		return ERR_CAST(dentry);
+
 	if (!(open_flag & O_CREAT)) {
-		if (nd->last.name[nd->last.len])
-			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
-		/* we _can_ be in RCU mode here */
-		dentry = lookup_fast(nd);
-		if (IS_ERR(dentry))
-			return ERR_CAST(dentry);
 		if (likely(dentry))
 			goto finish_lookup;
 
 		if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
 			return ERR_PTR(-ECHILD);
 	} else {
-		/* create side of things */
 		if (nd->flags & LOOKUP_RCU) {
+			/* can stay in rcuwalk if not auditing */
+			if (dentry && audit_dummy_context())
+				goto check_slashes;
 			if (!try_to_unlazy(nd))
 				return ERR_PTR(-ECHILD);
 		}
 		audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
-		/* trailing slashes? */
-		if (unlikely(nd->last.name[nd->last.len]))
+check_slashes:
+		if (trailing_slashes(nd))
 			return ERR_PTR(-EISDIR);
+		if (dentry)
+			goto finish_lookup;
 	}
 
 	if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {

---
base-commit: 0c3836482481200ead7b416ca80c68a29cfdaabd
change-id: 20240723-openfast-ac49a7b6ade2

Best regards,
-- 
Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux