Re: [PATCH 1/2] bcache: introduce ioprio-based bypass/writeback hints

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 28 Sep 2016, Kai Krakow wrote:

> Am Mon, 26 Sep 2016 16:17:22 -0700
> schrieb Eric Wheeler <git@xxxxxxxxxxxxxxxxxx>:
> 
> > Add support to bcache hinting functions and sysfs to hint by the
> > ioprio of 'current' which can be configured with `ionice`.
> > 
> > Cache hinting is configurable by writing 'class,level' pairs to sysfs.
> > These are the defaults:
> > 	echo 2,7 > /sys/block/bcache0/bcache/ioprio_bypass
> > 	echo 2,0 > /sys/block/bcache0/bcache/ioprio_writeback
> > 
> > (-p) IO Class	 (-n) Class level	Action
> > -----------------------------------------------------
> > (1) Realtime	  0-7				Writeback
> > (2) Best-effort     0 				Writeback
> > (2) Best-effort   1-6				Original bcache
> > logic (2) Best-effort     7				Bypass
> > cache (3) Idle          n/a				Bypass
> > cache
> > 
> > See `man ionice` for more ioprio detail.
> 
> This patch fails to apply on 4.7.5... Is it possible to adapt for that
> version?
>

Try this one. It's my 4.1 backport which applies cleanly to 4.7, too.
 
commit 03d581518c28b9361cfde85922919baa1e430c54
Author: Eric Wheeler <git@xxxxxxxxxxxxxxxxxx>
Date:   Mon Sep 26 16:17:22 2016 -0700

    bcache: introduce ioprio-based bypass/writeback hints
    
    Add support to bcache hinting functions and sysfs to hint by the ioprio of
    'current' which can be configured with `ionice`.
    
    Cache hinting is configurable by writing 'class,level' pairs to sysfs.
    These are the defaults:
    	echo 2,7 > /sys/block/bcache0/bcache/ioprio_bypass
    	echo 2,0 > /sys/block/bcache0/bcache/ioprio_writeback
    
    (-c) IO Class	 (-n) Class level	Action
    -----------------------------------------------------
    (1) Realtime	  0-7				Writeback
    (2) Best-effort     0 				Writeback
    (2) Best-effort   1-6				Original bcache logic
    (2) Best-effort     7				Bypass cache
    (3) Idle          n/a				Bypass cache
    
    See `man ionice` for more ioprio detail.
    
    Signed-off-by: Eric Wheeler <bcache@xxxxxxxxxxxxxxxxxx>
    
    Conflicts:
    	drivers/md/bcache/writeback.h

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 04f7bc2..ff1026f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -382,6 +382,9 @@ struct cached_dev {
 	unsigned		writeback_rate_update_seconds;
 	unsigned		writeback_rate_d_term;
 	unsigned		writeback_rate_p_term_inverse;
+
+	unsigned short		ioprio_writeback;
+	unsigned short		ioprio_bypass;
 };
 
 enum alloc_reserve {
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ab43fad..7b36da3 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -369,6 +369,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 	unsigned sectors, congested = bch_get_congested(c);
 	struct task_struct *task = current;
 	struct io *i;
+	struct io_context *ioc;
 
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
 	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -380,6 +381,18 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 	     (bio->bi_rw & REQ_WRITE)))
 		goto skip;
 
+	/* If process ioprio is lower-or-equal to dc->ioprio_bypass, then
+	 * hint for bypass. Note that a lower-priority IO class+value
+	 * has a greater numeric value. */
+	ioc = get_task_io_context(current, GFP_ATOMIC, NUMA_NO_NODE);
+	if (ioc
+		&& ioprio_valid(ioc->ioprio)
+		&& ioprio_valid(dc->ioprio_writeback)
+		&& ioc->ioprio >= dc->ioprio_bypass) {
+		put_io_context(ioc);
+		return true;
+	}
+
 	if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
 	    bio_sectors(bio) & (c->sb.block_size - 1)) {
 		pr_debug("skipping unaligned io");
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index b3ff57d..83ac010 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -106,6 +106,9 @@ rw_attribute(btree_shrinker_disabled);
 rw_attribute(copy_gc_enabled);
 rw_attribute(size);
 
+rw_attribute(ioprio_writeback);
+rw_attribute(ioprio_bypass);
+
 SHOW(__bch_cached_dev)
 {
 	struct cached_dev *dc = container_of(kobj, struct cached_dev,
@@ -182,6 +185,17 @@ SHOW(__bch_cached_dev)
 		return strlen(buf);
 	}
 
+	if (attr == &sysfs_ioprio_bypass)
+		return snprintf(buf, PAGE_SIZE-1, "%d,%ld\n",
+			IOPRIO_PRIO_CLASS(dc->ioprio_bypass),
+			IOPRIO_PRIO_DATA(dc->ioprio_bypass));
+
+	if (attr == &sysfs_ioprio_writeback)
+		return snprintf(buf, PAGE_SIZE-1, "%d,%ld\n",
+			IOPRIO_PRIO_CLASS(dc->ioprio_writeback),
+			IOPRIO_PRIO_DATA(dc->ioprio_writeback));
+
+
 #undef var
 	return 0;
 }
@@ -194,6 +208,8 @@ STORE(__cached_dev)
 	unsigned v = size;
 	struct cache_set *c;
 	struct kobj_uevent_env *env;
+	unsigned ioprio_class = 0; /* invalid initial ioprio values */
+	unsigned ioprio_value = IOPRIO_BE_NR+1;
 
 #define d_strtoul(var)		sysfs_strtoul(var, dc->var)
 #define d_strtoul_nonzero(var)	sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
@@ -282,6 +298,33 @@ STORE(__cached_dev)
 	if (attr == &sysfs_stop)
 		bcache_device_stop(&dc->disk);
 
+	if (attr == &sysfs_ioprio_writeback) {
+		if (sscanf(buf, "%u,%u", &ioprio_class, &ioprio_value) != 2
+			|| ioprio_class > IOPRIO_CLASS_IDLE
+			|| ioprio_class < IOPRIO_CLASS_RT
+			|| ioprio_value >= IOPRIO_BE_NR)
+			pr_err("bcache: ioprio_writeback invalid ioprio class: %u data value: %u\n",
+				ioprio_class, ioprio_value);
+		else if (ioprio_class == IOPRIO_CLASS_IDLE)
+			dc->ioprio_writeback = IOPRIO_PRIO_VALUE(ioprio_class, 0);
+		else
+			dc->ioprio_writeback = IOPRIO_PRIO_VALUE(ioprio_class, ioprio_value);
+	}
+
+	if (attr == &sysfs_ioprio_bypass)
+	{
+		if (sscanf(buf, "%u,%u", &ioprio_class, &ioprio_value) != 2
+			|| ioprio_class > IOPRIO_CLASS_IDLE
+			|| ioprio_class < IOPRIO_CLASS_RT
+			|| ioprio_value >= IOPRIO_BE_NR)
+			pr_err("bcache: ioprio_bypass invalid ioprio class: %u data value: %u\n",
+				ioprio_class, ioprio_value);
+		else if (ioprio_class == IOPRIO_CLASS_IDLE)
+			dc->ioprio_writeback = IOPRIO_PRIO_VALUE(ioprio_class, 0);
+		else
+			dc->ioprio_writeback = IOPRIO_PRIO_VALUE(ioprio_class, ioprio_value);
+	}
+
 	return size;
 }
 
@@ -334,6 +377,8 @@ static struct attribute *bch_cached_dev_files[] = {
 	&sysfs_verify,
 	&sysfs_bypass_torture_test,
 #endif
+	&sysfs_ioprio_bypass,
+	&sysfs_ioprio_writeback,
 	NULL
 };
 KTYPE(bch_cached_dev);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f1986bc..cc2c7ae 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -494,6 +494,9 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
 	dc->writeback_rate_d_term	= 30;
 	dc->writeback_rate_p_term_inverse = 6000;
 
+	dc->ioprio_writeback = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
+	dc->ioprio_bypass    = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NR-1);
+
 	INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
 }
 
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 0a9dab1..defd168 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -43,6 +43,7 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
 				    unsigned cache_mode, bool would_skip)
 {
 	unsigned in_use = dc->disk.c->gc_stats.in_use;
+	struct io_context *ioc;
 
 	if (cache_mode != CACHE_MODE_WRITEBACK ||
 	    test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
@@ -57,6 +58,18 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
 	if (would_skip)
 		return false;
 
+	/* If process ioprio is higher-or-equal to dc->ioprio_writeback, then
+	 * hint for writeback. Note that a higher-priority IO class+value
+	 * has a lesser numeric value. */
+	ioc = get_task_io_context(current, GFP_ATOMIC, NUMA_NO_NODE);
+	if (ioc
+		&& ioprio_valid(ioc->ioprio)
+		&& ioprio_valid(dc->ioprio_writeback)
+		&& ioc->ioprio <= dc->ioprio_writeback) {
+		put_io_context(ioc);
+		return true;
+	}
+
 	return bio->bi_rw & REQ_SYNC ||
 		in_use <= CUTOFF_WRITEBACK;
 }

--
To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux