[PATCH 16/16] vfs: only retry last component if opening stale dentry

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Miklos Szeredi <mszeredi@xxxxxxx>

NFS optimizes away d_revalidates for last component of open.  This means that
open itself can find the dentry stale.  It returns ESTALE resulting in the
complete path being looked up again with LOOKUP_REVAL.

This is unnecessary, however, since it would be enough to retry the last
component only.  Introduce EOPENSTALE (a kernel private errno) and allow NFS to
retry opening only the last component.

Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
---
 fs/namei.c            |   34 ++++++++++++++++++++++++++++++----
 fs/nfs/file.c         |    2 +-
 fs/open.c             |   16 +++++++++-------
 include/linux/errno.h |    1 +
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 66b26da..c2f7951 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2133,8 +2133,8 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * Another problem is returing the "right" error value (e.g. for an
 	 * O_EXCL open we want to return EEXIST not EROFS).
 	 */
-	if ((open_flag & (O_CREAT | O_TRUNC)) ||
-	    (open_flag & O_ACCMODE) != O_RDONLY) {
+	if (!*want_write && ((open_flag & (O_CREAT | O_TRUNC)) ||
+			     (open_flag & O_ACCMODE) != O_RDONLY)) {
 		error = mnt_want_write(nd->path.mnt);
 		if (!error) {
 			*want_write = 1;
@@ -2305,6 +2305,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	struct file *filp;
 	struct inode *inode;
 	int symlink_ok = 0;
+	struct path save_parent = { .dentry = NULL, .mnt = NULL };
+	bool retried = false;
 	int error;
 
 	nd->flags &= ~LOOKUP_PARENT;
@@ -2368,6 +2370,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 			goto exit;
 	}
 
+retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
 
 	filp = lookup_open(nd, path, od, op, &want_write);
@@ -2462,12 +2465,21 @@ finish_lookup:
 		return NULL;
 	}
 
-	path_to_nameidata(path, nd);
+	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
+		path_to_nameidata(path, nd);
+	} else {
+		save_parent.dentry = nd->path.dentry;
+		save_parent.mnt = mntget(path->mnt);
+		nd->path.dentry = path->dentry;
+
+	}
 	nd->inode = inode;
 
 	error = complete_walk(nd);
-	if (error)
+	if (error) {
+		path_put(&save_parent);
 		return ERR_PTR(error);
+	}
 	error = -EISDIR;
 	if ((open_flag & O_CREAT) && S_ISDIR(inode->i_mode))
 		goto exit;
@@ -2492,6 +2504,19 @@ common:
 		goto exit;
 	od->mnt = nd->path.mnt;
 	filp = finish_open(od, nd->path.dentry, NULL);
+	if (IS_ERR(filp) && PTR_ERR(filp) == -EOPENSTALE) {
+		error = -ESTALE;
+		if (!save_parent.dentry || retried)
+			goto exit;
+		BUG_ON(save_parent.dentry != dir);
+		path_put(&nd->path);
+		nd->path = save_parent;
+		nd->inode = dir->d_inode;
+		save_parent.mnt = NULL;
+		save_parent.dentry = NULL;
+		retried = true;
+		goto retry_lookup;
+	}
 	if (IS_ERR(filp))
 		goto out;
 	error = open_check_o_direct(filp);
@@ -2510,6 +2535,7 @@ opened:
 out:
 	if (want_write)
 		mnt_drop_write(nd->path.mnt);
+	path_put(&save_parent);
 	path_put(&nd->path);
 	return filp;
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4e626ec..bb1f5cb 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -935,7 +935,7 @@ out:
 
 out_drop:
 	d_drop(dentry);
-	err = -ESTALE;
+	err = -EOPENSTALE;
 	goto out_put_ctx;
 }
 
diff --git a/fs/open.c b/fs/open.c
index a18e6bc..e7298b5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -737,7 +737,6 @@ cleanup_all:
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
-	put_filp(f);
 	dput(dentry);
 	mntput(mnt);
 	return ERR_PTR(error);
@@ -757,15 +756,16 @@ cleanup_file:
 struct file *finish_open(struct opendata *od, struct dentry *dentry,
 			 int (*open)(struct inode *, struct file *))
 {
-	struct file *filp;
-
-	filp = od->filp;
-	od->filp = NULL;
+	struct file *res;
 
 	mntget(od->mnt);
 	dget(dentry);
 
-	return do_dentry_open(dentry, od->mnt, filp, open, current_cred());
+	res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred());
+	if (!IS_ERR(res))
+		od->filp = NULL;
+
+	return res;
 }
 EXPORT_SYMBOL(finish_open);
 
@@ -809,7 +809,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 
 	f->f_flags = flags;
 	res = do_dentry_open(dentry, mnt, f, NULL, cred);
-	if (!IS_ERR(res)) {
+	if (IS_ERR(res)) {
+		put_filp(f);
+	} else {
 		int error = open_check_o_direct(f);
 		if (error) {
 			fput(res);
diff --git a/include/linux/errno.h b/include/linux/errno.h
index 4668583..b1c33a0 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -16,6 +16,7 @@
 #define ERESTARTNOHAND	514	/* restart if no handler.. */
 #define ENOIOCTLCMD	515	/* No ioctl command */
 #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
+#define EOPENSTALE	517	/* open found a stale dentry */
 
 /* Defined for the NFSv3 protocol */
 #define EBADHANDLE	521	/* Illegal NFS file handle */
-- 
1.7.7

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux