nfs_congestion_kb is to control the max allowed writeback and in-commit pages. It's not reasonable for them to outnumber dirty and to-commit pages. So each of them should not take more than 1/4 dirty threshold. Considering that nfs_init_writepagecache() is called on fresh boot, at the time dirty_thresh is much higher than the real dirty limit after lots of user space memory consumptions, use 1/8 instead. Feng: fix deadlock by preventing (nfs_congestion_kb == 0) CC: Trond Myklebust <Trond.Myklebust@xxxxxxxxxx> Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx> --- fs/nfs/write.c | 36 +++++++++++++++++------------------- mm/page-writeback.c | 6 ++++++ 2 files changed, 23 insertions(+), 19 deletions(-) --- linux-next.orig/fs/nfs/write.c 2011-10-20 23:45:59.000000000 +0800 +++ linux-next/fs/nfs/write.c 2011-10-20 23:53:16.000000000 +0800 @@ -1782,6 +1782,22 @@ out: } #endif +void nfs_update_congestion_thresh(void) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + + /* + * Limit to 1/8 dirty threshold, so that writeback+in_commit pages + * won't overnumber dirty+to_commit pages. + */ + global_dirty_limits(&background_thresh, &dirty_thresh); + dirty_thresh <<= PAGE_SHIFT - 10; + dirty_thresh += 1024; + + nfs_congestion_kb = dirty_thresh / 8; +} + int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", @@ -1801,25 +1817,7 @@ int __init nfs_init_writepagecache(void) if (nfs_commit_mempool == NULL) return -ENOMEM; - /* - * NFS congestion size, scale with available memory. - * - * 64MB: 8192k - * 128MB: 11585k - * 256MB: 16384k - * 512MB: 23170k - * 1GB: 32768k - * 2GB: 46340k - * 4GB: 65536k - * 8GB: 92681k - * 16GB: 131072k - * - * This allows larger machines to have larger/more transfers. - * Limit the default to 256M - */ - nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); - if (nfs_congestion_kb > 256*1024) - nfs_congestion_kb = 256*1024; + nfs_update_congestion_thresh(); return 0; } --- linux-next.orig/mm/page-writeback.c 2011-10-20 23:45:23.000000000 +0800 +++ linux-next/mm/page-writeback.c 2011-10-20 23:48:07.000000000 +0800 @@ -207,6 +207,10 @@ static int calc_period_shift(void) return 2 + ilog2(dirty_total - 1); } +void __weak nfs_update_congestion_thresh(void) +{ +} + /* * update the period when the dirty threshold changes. */ @@ -217,6 +221,7 @@ static void update_completion_period(voi prop_change_shift(&vm_dirties, shift); writeback_set_ratelimit(); + nfs_update_congestion_thresh(); } int dirty_background_ratio_handler(struct ctl_table *table, int write, @@ -447,6 +452,7 @@ unsigned long bdi_dirty_limit(struct bac return bdi_dirty; } +EXPORT_SYMBOL_GPL(global_dirty_limits); /* * Dirty position control. -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html