On Fri, Nov 19, 2010 at 12:22:22AM +0800, Wu Fengguang wrote: > On Fri, Nov 19, 2010 at 12:06:52AM +0800, Wu Fengguang wrote: > > On Fri, Nov 19, 2010 at 12:02:01AM +0800, Peter Zijlstra wrote: > > > On Thu, 2010-11-18 at 23:44 +0800, Wu Fengguang wrote: > > > > + pause = HZ * pages_dirtied / (bw + 1); > > > > > > Shouldn't that be using something like div64_u64 ? > > > > OK, but a dumb question: gcc cannot handle this implicitly? > > Hmm, I'm tempting to do > > pause = HZ * pages_dirtied / ((unsigned long)bw + 1); > > because here "bw" is guaranteed to be smaller than ULONG_MAX. Here it is :) --- Subject: writeback: prevent bandwidth calculation overflow Date: Thu Nov 18 12:55:42 CST 2010 On 32bit kernel, bdi->write_bandwidth can express at most 4GB/s. However the current calculation code can overflow when disk bandwidth reaches 800MB/s. Fix it by using "long long" and div64_u64() in the calculations. And further change its unit from bytes/second to pages/second. That allows up to 16TB/s bandwidth in 32bit kernel. CC: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Acked-by: Rik van Riel <riel@xxxxxxxxxx> Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx> --- mm/backing-dev.c | 4 ++-- mm/page-writeback.c | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) --- linux-next.orig/mm/page-writeback.c 2010-11-18 12:42:58.000000000 +0800 +++ linux-next/mm/page-writeback.c 2010-11-19 00:22:39.000000000 +0800 @@ -494,7 +494,7 @@ void bdi_update_write_bandwidth(struct b unsigned long written; unsigned long elapsed; unsigned long bw; - unsigned long w; + unsigned long long w; if (*bw_written == 0) goto snapshot; @@ -513,7 +513,7 @@ void bdi_update_write_bandwidth(struct b goto snapshot; written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]) - *bw_written; - bw = (HZ * PAGE_CACHE_SIZE * written + elapsed/2) / elapsed; + bw = (HZ * written + elapsed/2) / elapsed; w = min(elapsed / unit_time, 128UL); bdi->write_bandwidth = (bdi->write_bandwidth * (1024-w) + bw * w) >> 10; bdi->write_bandwidth_update_time = jiffies; @@ -539,7 +539,7 @@ static void balance_dirty_pages(struct a unsigned long dirty_thresh; unsigned long bdi_thresh; unsigned long task_thresh; - unsigned long bw; + unsigned long long bw; unsigned long pause = 0; bool dirty_exceeded = false; struct backing_dev_info *bdi = mapping->backing_dev_info; @@ -602,8 +602,7 @@ static void balance_dirty_pages(struct a * of dirty pages have been cleaned during our pause time. */ if (nr_dirty < dirty_thresh && - bdi_prev_dirty - bdi_dirty > - bdi->write_bandwidth >> (PAGE_CACHE_SHIFT + 2)) + bdi_prev_dirty - bdi_dirty > bdi->write_bandwidth / 4) break; bdi_prev_dirty = bdi_dirty; @@ -626,7 +625,7 @@ static void balance_dirty_pages(struct a bw = bw * (task_thresh - bdi_dirty); bw = bw / (bdi_thresh / TASK_SOFT_DIRTY_LIMIT + 1); - pause = HZ * (pages_dirtied << PAGE_CACHE_SHIFT) / (bw + 1); + pause = HZ * pages_dirtied / ((unsigned long)bw + 1); pause = clamp_val(pause, 1, HZ/10); pause: --- linux-next.orig/mm/backing-dev.c 2010-11-18 14:24:45.000000000 +0800 +++ linux-next/mm/backing-dev.c 2010-11-18 14:27:00.000000000 +0800 @@ -103,7 +103,7 @@ static int bdi_debug_stats_show(struct s (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), K(bdi_thresh), K(dirty_thresh), K(background_thresh), (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)), - (unsigned long) bdi->write_bandwidth >> 10, + (unsigned long) K(bdi->write_bandwidth), nr_dirty, nr_io, nr_more_io, !list_empty(&bdi->bdi_list), bdi->state); #undef K @@ -662,7 +662,7 @@ int bdi_init(struct backing_dev_info *bd goto err; } - bdi->write_bandwidth = 100 << 20; + bdi->write_bandwidth = (100 << 20) / PAGE_CACHE_SIZE; bdi->dirty_exceeded = 0; err = prop_local_init_percpu(&bdi->completions); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html