[PATCH, RFC] vm: Add an tuning knob for vm.max_writeback_pages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



MAX_WRITEBACK_PAGES was hard-coded to 1024 because of a concern of not
holding I_SYNC for too long.  But this shouldn't be a concern since
I_LOCK and I_SYNC have been separated.  So make it be a tunable and
change the default to be 32768.

This change is helpful for ext4 since it means we write out large file
in bigger chunks than just 4 megabytes at a time, so that when we have
multiple large files in the page cache waiting for writeback, the
files don't end up getting interleaved.  There shouldn't be any downside.

http://bugzilla.kernel.org/show_bug.cgi?id=13930

Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx>
---
 include/linux/writeback.h |    1 +
 kernel/sysctl.c           |    8 ++++++++
 mm/page-writeback.c       |   27 ++++++++++++++-------------
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3224820..c1e6c08 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -110,6 +110,7 @@ extern int vm_dirty_ratio;
 extern unsigned long vm_dirty_bytes;
 extern unsigned int dirty_writeback_interval;
 extern unsigned int dirty_expire_interval;
+extern unsigned int max_writeback_pages;
 extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be760..06d1c4c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1104,6 +1104,14 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "max_writeback_pages",
+		.data		= &max_writeback_pages,
+		.maxlen		= sizeof(max_writeback_pages),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
 		.procname	= "nr_pdflush_threads",
 		.data		= &nr_pdflush_threads,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 81627eb..eac8026 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -36,15 +36,6 @@
 #include <linux/pagevec.h>
 
 /*
- * The maximum number of pages to writeout in a single bdflush/kupdate
- * operation.  We do this so we don't hold I_SYNC against an inode for
- * enormous amounts of time, which would block a userspace task which has
- * been forced to throttle against that inode.  Also, the code reevaluates
- * the dirty each time it has written this many pages.
- */
-#define MAX_WRITEBACK_PAGES	1024
-
-/*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
  * will look to see if it needs to force writeback or throttling.
  */
@@ -64,6 +55,16 @@ static inline long sync_writeback_pages(void)
 /* The following parameters are exported via /proc/sys/vm */
 
 /*
+ * The maximum number of pages to writeout in a single bdflush/kupdate
+ * operation.  We used to limit this to 1024 pages to avoid holding
+ * I_SYNC against an inode for a long period of times, but since
+ * I_SYNC has been separated out from I_LOCK, the only time a process
+ * waits for I_SYNC is when it is calling fsync() or otherwise forcing
+ * out the inode.
+ */
+unsigned int max_writeback_pages = 32768;
+
+/*
  * Start background writeback (via pdflush) at this percentage
  */
 int dirty_background_ratio = 10;
@@ -708,10 +709,10 @@ static void background_writeout(unsigned long _min_pages)
 			break;
 		wbc.more_io = 0;
 		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+		wbc.nr_to_write = max_writeback_pages;
 		wbc.pages_skipped = 0;
 		writeback_inodes(&wbc);
-		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		min_pages -= max_writeback_pages - wbc.nr_to_write;
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
 			if (wbc.encountered_congestion || wbc.more_io)
@@ -783,7 +784,7 @@ static void wb_kupdate(unsigned long arg)
 	while (nr_to_write > 0) {
 		wbc.more_io = 0;
 		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+		wbc.nr_to_write = max_writeback_pages;
 		writeback_inodes(&wbc);
 		if (wbc.nr_to_write > 0) {
 			if (wbc.encountered_congestion || wbc.more_io)
@@ -791,7 +792,7 @@ static void wb_kupdate(unsigned long arg)
 			else
 				break;	/* All the old data is written */
 		}
-		nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		nr_to_write -= max_writeback_pages - wbc.nr_to_write;
 	}
 	if (time_before(next_jif, jiffies + HZ))
 		next_jif = jiffies + HZ;
-- 
1.6.3.2.1.gb9f7d.dirty

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux