[PATCH 4/6] blk-cgroup: buffered write IO controller - bandwidth limit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A bare per-cgroup buffered write IO controller.

Basically, when there are N dd tasks running in the blkcg,
blkcg->dirty_ratelimit will be balanced around

	blkcg->buffered_write_bps / N

and each blkcg task will be throttled under

	blkcg->dirty_ratelimit
or 
	min(blkcg->dirty_ratelimit, bdi->dirty_ratelimit)
when there are other dirtier tasks in the system.

CC: Vivek Goyal <vgoyal@xxxxxxxxxx>
CC: Andrea Righi <arighi@xxxxxxxxxxx>
Signed-off-by: Wu Fengguang <fengguang.wu@xxxxxxxxx>
---
 include/linux/blk-cgroup.h |   20 +++++++++++
 mm/page-writeback.c        |   59 +++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

--- linux-next.orig/mm/page-writeback.c	2012-03-28 15:36:16.414093131 +0800
+++ linux-next/mm/page-writeback.c	2012-03-28 15:40:25.446088022 +0800
@@ -1145,6 +1145,54 @@ static long bdi_min_pause(struct backing
 	return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
 }
 
+#ifdef CONFIG_BLK_DEV_THROTTLING
+static void blkcg_update_dirty_ratelimit(struct blkio_cgroup *blkcg,
+					 unsigned long dirtied,
+					 unsigned long elapsed)
+{
+	unsigned long long bps = blkcg_buffered_write_bps(blkcg);
+	unsigned long long ratelimit;
+	unsigned long dirty_rate;
+
+	dirty_rate = (dirtied - blkcg->dirtied_stamp) * HZ;
+	dirty_rate /= elapsed;
+
+	ratelimit = blkcg->dirty_ratelimit;
+	ratelimit *= div_u64(bps, dirty_rate + 1);
+	ratelimit = min(ratelimit, bps);
+	ratelimit >>= PAGE_SHIFT;
+
+	blkcg->dirty_ratelimit = (blkcg->dirty_ratelimit + ratelimit) / 2 + 1;
+}
+
+void blkcg_update_bandwidth(struct blkio_cgroup *blkcg)
+{
+	unsigned long now = jiffies;
+	unsigned long dirtied;
+	unsigned long elapsed;
+
+	if (!blkcg)
+		return;
+	if (!spin_trylock(&blkcg->lock))
+		return;
+
+	elapsed = now - blkcg->bw_time_stamp;
+	dirtied = percpu_counter_read(&blkcg->nr_dirtied);
+
+	if (elapsed > MAX_PAUSE * 2)
+		goto snapshot;
+	if (elapsed <= MAX_PAUSE)
+		goto unlock;
+
+	blkcg_update_dirty_ratelimit(blkcg, dirtied, elapsed);
+snapshot:
+	blkcg->dirtied_stamp = dirtied;
+	blkcg->bw_time_stamp = now;
+unlock:
+	spin_unlock(&blkcg->lock);
+}
+#endif
+
 /*
  * balance_dirty_pages() must be called by processes which are generating dirty
  * data.  It looks at the number of dirty pages in the machine and will force
@@ -1174,6 +1222,7 @@ static void balance_dirty_pages(struct a
 	unsigned long pos_ratio;
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	unsigned long start_time = jiffies;
+	struct blkio_cgroup *blkcg = task_blkio_cgroup(current);
 
 	for (;;) {
 		unsigned long now = jiffies;
@@ -1198,6 +1247,8 @@ static void balance_dirty_pages(struct a
 		freerun = dirty_freerun_ceiling(dirty_thresh,
 						background_thresh);
 		if (nr_dirty <= freerun) {
+			if (blkcg_buffered_write_bps(blkcg))
+				goto blkcg_bps;
 			current->dirty_paused_when = now;
 			current->nr_dirtied = 0;
 			current->nr_dirtied_pause =
@@ -1263,6 +1314,14 @@ static void balance_dirty_pages(struct a
 			task_ratelimit = (u64)task_ratelimit *
 				blkcg_weight(blkcg) / BLKIO_WEIGHT_DEFAULT;
 
+		if (blkcg_buffered_write_bps(blkcg) &&
+		    task_ratelimit > blkcg_dirty_ratelimit(blkcg)) {
+blkcg_bps:
+			blkcg_update_bandwidth(blkcg);
+			dirty_ratelimit = blkcg_dirty_ratelimit(blkcg);
+			task_ratelimit = dirty_ratelimit;
+		}
+
 		max_pause = bdi_max_pause(bdi, bdi_dirty);
 		min_pause = bdi_min_pause(bdi, max_pause,
 					  task_ratelimit, dirty_ratelimit,
--- linux-next.orig/include/linux/blk-cgroup.h	2012-03-28 15:36:16.414093131 +0800
+++ linux-next/include/linux/blk-cgroup.h	2012-03-28 15:39:46.730088815 +0800
@@ -122,6 +122,10 @@ struct blkio_cgroup {
 	struct hlist_head blkg_list;
 	struct list_head policy_list; /* list of blkio_policy_node */
 	struct percpu_counter nr_dirtied;
+	unsigned long bw_time_stamp;
+	unsigned long dirtied_stamp;
+	unsigned long dirty_ratelimit;
+	unsigned long long buffered_write_bps;
 };
 
 struct blkio_group_stats {
@@ -217,6 +221,14 @@ static inline unsigned int blkcg_weight(
 {
 	return blkcg->weight;
 }
+static inline uint64_t blkcg_buffered_write_bps(struct blkio_cgroup *blkcg)
+{
+	return blkcg->buffered_write_bps;
+}
+static inline unsigned long blkcg_dirty_ratelimit(struct blkio_cgroup *blkcg)
+{
+	return blkcg->dirty_ratelimit;
+}
 
 typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
 
@@ -272,6 +284,14 @@ static inline unsigned int blkcg_weight(
 {
 	return BLKIO_WEIGHT_DEFAULT;
 }
+static inline uint64_t blkcg_buffered_write_bps(struct blkio_cgroup *blkcg)
+{
+	return 0;
+}
+static inline unsigned long blkcg_dirty_ratelimit(struct blkio_cgroup *blkcg)
+{
+	return 0;
+}
 
 #endif
 


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux