[PATCH 29/35] nfs: in-commit pages accounting and wait queue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When doing 10+ concurrent dd's, I observed very bumpy commits submission
(partly because the dd's are started at the same time, and hence reached
4MB to-commit pages at the same time). Basically we rely on the server
to complete and return write/commit requests, and want both to progress
smoothly and not consume too many pages. The write request wait queue is
not enough as it's mainly network bounded. So add another commit request
wait queue. Only async writes need to sleep on this queue.

cc: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx>
Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx>
---
 fs/nfs/client.c           |    1 
 fs/nfs/write.c            |   51 ++++++++++++++++++++++++++++++------
 include/linux/nfs_fs_sb.h |    2 +
 3 files changed, 46 insertions(+), 8 deletions(-)

--- linux-next.orig/fs/nfs/write.c	2010-12-13 21:46:21.000000000 +0800
+++ linux-next/fs/nfs/write.c	2010-12-13 21:46:21.000000000 +0800
@@ -516,7 +516,7 @@ nfs_mark_request_commit(struct nfs_page 
 }
 
 static int
-nfs_clear_request_commit(struct nfs_page *req)
+nfs_clear_request_commit(struct inode *inode, struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
 
@@ -554,7 +554,7 @@ nfs_mark_request_commit(struct nfs_page 
 }
 
 static inline int
-nfs_clear_request_commit(struct nfs_page *req)
+nfs_clear_request_commit(struct inode *inode, struct nfs_page *req)
 {
 	return 0;
 }
@@ -599,8 +599,10 @@ nfs_scan_commit(struct inode *inode, str
 		return 0;
 
 	ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
-	if (ret > 0)
+	if (ret > 0) {
 		nfsi->ncommit -= ret;
+		atomic_long_add(ret, &NFS_SERVER(inode)->in_commit);
+	}
 	if (nfs_need_commit(NFS_I(inode)))
 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	return ret;
@@ -668,7 +670,7 @@ static struct nfs_page *nfs_try_to_updat
 		spin_lock(&inode->i_lock);
 	}
 
-	if (nfs_clear_request_commit(req) &&
+	if (nfs_clear_request_commit(inode, req) &&
 			radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
 				req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL)
 		NFS_I(inode)->ncommit--;
@@ -1271,6 +1273,34 @@ int nfs_writeback_done(struct rpc_task *
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_commit_wait(struct nfs_server *nfss)
+{
+	long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+	DEFINE_WAIT(wait);
+
+	if (atomic_long_read(&nfss->in_commit) < limit)
+		return;
+
+	for (;;) {
+		prepare_to_wait(&nfss->in_commit_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (atomic_long_read(&nfss->in_commit) < limit)
+			break;
+
+		io_schedule();
+	}
+	finish_wait(&nfss->in_commit_wait, &wait);
+}
+
+static void nfs_commit_wakeup(struct nfs_server *nfss)
+{
+	long limit = nfs_congestion_kb >> (PAGE_SHIFT - 10);
+
+	if (atomic_long_read(&nfss->in_commit) < limit - limit / 8 &&
+	    waitqueue_active(&nfss->in_commit_wait))
+		wake_up(&nfss->in_commit_wait);
+}
+
 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 {
 	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
@@ -1376,6 +1406,7 @@ nfs_commit_list(struct inode *inode, str
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
+		atomic_long_dec(&NFS_SERVER(inode)->in_commit);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
 				BDI_RECLAIMABLE);
@@ -1409,7 +1440,8 @@ static void nfs_commit_release(void *cal
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
-		nfs_clear_request_commit(req);
+		nfs_clear_request_commit(data->inode, req);
+		atomic_long_dec(&NFS_SERVER(data->inode)->in_commit);
 
 		dprintk("NFS:       commit (%s/%lld %d@%lld)",
 			req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -1438,6 +1470,7 @@ static void nfs_commit_release(void *cal
 		nfs_clear_page_tag_locked(req);
 	}
 	nfs_commit_clear_lock(NFS_I(data->inode));
+	nfs_commit_wakeup(NFS_SERVER(data->inode));
 	nfs_commitdata_release(calldata);
 }
 
@@ -1452,11 +1485,13 @@ static const struct rpc_call_ops nfs_com
 int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
-	int may_wait = how & FLUSH_SYNC;
+	int sync = how & FLUSH_SYNC;
 	int res = 0;
 
-	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+	if (!nfs_commit_set_lock(NFS_I(inode), sync))
 		goto out_mark_dirty;
+	if (!sync)
+		nfs_commit_wait(NFS_SERVER(inode));
 	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&inode->i_lock);
@@ -1464,7 +1499,7 @@ int nfs_commit_inode(struct inode *inode
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
-		if (may_wait)
+		if (sync)
 			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
 					nfs_wait_bit_killable,
 					TASK_KILLABLE);
--- linux-next.orig/include/linux/nfs_fs_sb.h	2010-12-13 21:46:21.000000000 +0800
+++ linux-next/include/linux/nfs_fs_sb.h	2010-12-13 21:46:21.000000000 +0800
@@ -107,6 +107,8 @@ struct nfs_server {
 	struct backing_dev_info	backing_dev_info;
 	atomic_long_t		writeback;	/* number of writeback pages */
 	wait_queue_head_t	writeback_wait[2];
+	atomic_long_t		in_commit;	/* number of in-commit pages */
+	wait_queue_head_t	in_commit_wait;
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
 	unsigned int		rsize;		/* read size */
--- linux-next.orig/fs/nfs/client.c	2010-12-13 21:46:21.000000000 +0800
+++ linux-next/fs/nfs/client.c	2010-12-13 21:46:21.000000000 +0800
@@ -1008,6 +1008,7 @@ static struct nfs_server *nfs_alloc_serv
 	atomic_set(&server->active, 0);
 	init_waitqueue_head(&server->writeback_wait[BLK_RW_SYNC]);
 	init_waitqueue_head(&server->writeback_wait[BLK_RW_ASYNC]);
+	init_waitqueue_head(&server->in_commit_wait);
 
 	server->io_stats = nfs_alloc_iostats();
 	if (!server->io_stats) {


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux