Re: Concurrent `ls` takes out the thrash

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Benjamin Coddington" <bcodding@xxxxxxxxxx>

Ehh.. I think it's kinda ugly, but this is what I came up with.

It works well, though, and handles everything I throw at it.  Its not as
simple as Christoph's suggestion to just go back to .iterate, which would
solve this in a simpler way.

Ben


8<---------------------------------------------------------------------------------------

>From c95f29761bb1f60480863601c0becdd6cba89998 Mon Sep 17 00:00:00 2001
Message-Id: <c95f29761bb1f60480863601c0becdd6cba89998.1481213418.git.bcodding@xxxxxxxxxx>
From: Benjamin Coddington <bcodding@xxxxxxxxxx>
Date: Wed, 7 Dec 2016 16:23:30 -0500
Subject: [PATCH] NFS: Handle concurrent users of nfs_readdir()

For a concurrent 'ls -l' workload, processes can stack up in nfs_readdir()
both waiting on the next page and taking turns filling the next page from
XDR, but only one of them will have desc->plus set because setting it
clears the flag on the directory.  If a page is filled by a process that
doesn't have desc->plus set then the next pass through lookup() will cause
it to dump the entire page cache with nfs_force_use_readdirplus().  Then
the next readdir starts all over filling the pagecache.  Forward progress
happens, but only after many steps back re-filling the pagecache.

Fix this by using an additional flag on the inode to extend the lifetime
of the readdir advisement to any processes that may be sending READDIR
calls for that directory in nfs_readdir().
---
 fs/nfs/dir.c           | 25 ++++++++++++++++++-------
 include/linux/nfs_fs.h |  1 +
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7483722162fa..d3e256d723d1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -167,8 +167,10 @@ typedef struct {
 	unsigned long	gencount;
 	unsigned int	cache_entry_index;
 	unsigned int	plus:1;
+	unsigned int	doer:1;
 	unsigned int	eof:1;
 } nfs_readdir_descriptor_t;
+static bool nfs_use_readdirplus(struct inode *, nfs_readdir_descriptor_t *desc);
 
 /*
  * The caller is responsible for calling nfs_readdir_release_array(page)
@@ -385,6 +387,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 	int		error;
 
  again:
+	desc->plus = nfs_use_readdirplus(inode, desc) ? 1 : 0;
 	timestamp = jiffies;
 	gencount = nfs_inc_attr_generation_counter();
 	error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
@@ -393,8 +396,6 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
-			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-			desc->plus = 0;
 			goto again;
 		}
 		goto error;
@@ -443,14 +444,22 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 }
 
 static
-bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
+bool nfs_use_readdirplus(struct inode *dir, nfs_readdir_descriptor_t *desc)
 {
+	struct nfs_inode *nfsi = NFS_I(dir);
+
 	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
 		return false;
-	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
+	if (test_bit(NFS_INO_DOING_RDPLUS, &nfsi->flags))
 		return true;
-	if (ctx->pos == 0)
+	if (desc->ctx->pos == 0)
 		return true;
+	if (test_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags) &&
+		!test_and_set_bit(NFS_INO_DOING_RDPLUS, &nfsi->flags)) {
+		clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+		desc->doer = 1;
+		return true;
+	}
 	return false;
 }
 
@@ -921,7 +930,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->ctx = ctx;
 	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
-	desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
 
 	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
@@ -943,7 +951,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
-			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
+			clear_bit(NFS_INO_DOING_RDPLUS, &NFS_I(inode)->flags);
 			nfs_zap_caches(inode);
 			desc->page_index = 0;
 			desc->plus = 0;
@@ -957,6 +965,9 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		if (res < 0)
 			break;
 	} while (!desc->eof);
+
+	if (desc->doer)
+		clear_bit(NFS_INO_DOING_RDPLUS, &NFS_I(inode)->flags);
 out:
 	if (res > 0)
 		res = 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cb631973839a..e4d31fbad407 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -207,6 +207,7 @@ struct nfs_inode {
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 #define NFS_INO_LAYOUTSTATS	(11)		/* layoutstats inflight */
 #define NFS_INO_ODIRECT		(12)		/* I/O setting is O_DIRECT */
+#define NFS_INO_DOING_RDPLUS	(13)		/* doing readdirplus */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux