Re: Concurrent `ls` takes out the thrash

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Benjamin Coddington" <bcodding@xxxxxxxxxx>

So, I've got the following patch, and it seems to work fine for the original
workload.  However, if I use ~20 procs started 2 seconds apart, I can still
sometimes get into the state where the machine takes a very long time (5 - 8
minutes).  I wonder if I am getting into a state were vmscan is reclaiming
pages while I'm trying to fill them up.  So.. I'll do a bit more debugging
and re-post this if I feel like it is still the right approach.

Adding an int to nfs_open_dir_context puts it at 60 bytes here.  Probably
could have added a unsigned long flags and used bits for the duped stuff as
well, but it was uglier that way, so I left it.  I like how duped carries
-1, 0, and >1 information without having flag defines cluttering everywhere.

Ben

8<------------------------------------------------------------------------

>From 5b3e747a984ec966e8c742d8f4fe4b08e1c93acd Mon Sep 17 00:00:00 2001
Message-Id: <5b3e747a984ec966e8c742d8f4fe4b08e1c93acd.1481149380.git.bcodding@xxxxxxxxxx>
From: Benjamin Coddington <bcodding@xxxxxxxxxx>
Date: Wed, 7 Dec 2016 16:23:30 -0500
Subject: [PATCH] NFS: Move readdirplus flag to directory context

For a concurrent 'ls -l' workload, processes can stack up in nfs_readdir()
both waiting on the next page and taking turns filling the next page from
XDR, but only one of them will have desc->plus set because setting it
clears the flag on the directory.  If a page is filled by a process that
doesn't have desc->plus set then the next pass through lookup() will cause
it to dump the entire page cache with nfs_force_use_readdirplus().  Then
the next readdir starts all over filling the pagecache.  Forward progress
happens, but only after many steps back re-filling the pagecache.

Fix this by moving the flag to use readdirplus to each open directory
context.

Suggested-by: Trond Myklebust <trondmy@xxxxxxxxxxxxxxx>
Signed-off-by: Benjamin Coddington <bcodding@xxxxxxxxxx>
---
 fs/nfs/dir.c           | 39 ++++++++++++++++++++++++---------------
 include/linux/nfs_fs.h |  1 +
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7483722162fa..818172606fed 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
 		ctx->dir_cookie = 0;
 		ctx->dup_cookie = 0;
 		ctx->cred = get_rpccred(cred);
+		ctx->use_readdir_plus = 0;
 		spin_lock(&dir->i_lock);
 		list_add(&ctx->list, &nfsi->open_files);
 		spin_unlock(&dir->i_lock);
@@ -443,17 +444,35 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 }
 
 static
-bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
+bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
+			struct nfs_open_dir_context *dir_ctx)
 {
 	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
 		return false;
-	if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
+	if (xchg(&dir_ctx->use_readdir_plus, 0))
 		return true;
 	if (ctx->pos == 0)
 		return true;
 	return false;
 }
 
+static
+void nfs_set_readdirplus(struct inode *dir, int force)
+{
+	struct nfs_inode *nfsi = NFS_I(dir);
+	struct nfs_open_dir_context *ctx;
+
+	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
+	    !list_empty(&nfsi->open_files)) {
+		spin_lock(&dir->i_lock);
+		list_for_each_entry(ctx, &nfsi->open_files, list)
+			ctx->use_readdir_plus = 1;
+		spin_unlock(&dir->i_lock);
+		if (force)
+			invalidate_mapping_pages(dir->i_mapping, 0, -1);
+	}
+}
+
 /*
  * This function is called by the lookup and getattr code to request the
  * use of readdirplus to accelerate any future lookups in the same
@@ -461,11 +480,7 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
  */
 void nfs_advise_use_readdirplus(struct inode *dir)
 {
-	struct nfs_inode *nfsi = NFS_I(dir);
-
-	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
-	    !list_empty(&nfsi->open_files))
-		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+	nfs_set_readdirplus(dir, 0);
 }
 
 /*
@@ -478,13 +493,7 @@ void nfs_advise_use_readdirplus(struct inode *dir)
  */
 void nfs_force_use_readdirplus(struct inode *dir)
 {
-	struct nfs_inode *nfsi = NFS_I(dir);
-
-	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
-	    !list_empty(&nfsi->open_files)) {
-		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
-		invalidate_mapping_pages(dir->i_mapping, 0, -1);
-	}
+	nfs_set_readdirplus(dir, 1);
 }
 
 static
@@ -921,7 +930,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->ctx = ctx;
 	desc->dir_cookie = &dir_ctx->dir_cookie;
 	desc->decode = NFS_PROTO(inode)->decode_dirent;
-	desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
+	desc->plus = nfs_use_readdirplus(inode, ctx, dir_ctx) ? 1 : 0;
 
 	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index cb631973839a..fe06c1dd1737 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -89,6 +89,7 @@ struct nfs_open_dir_context {
 	__u64 dir_cookie;
 	__u64 dup_cookie;
 	signed char duped;
+	int use_readdir_plus;
 };
 
 /*
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux