Re: [for-416 PATCH v2] bcache: writeback: collapse contiguous IO better

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Tang Junhui <tang.junhui@xxxxxxxxxx>

LGTM, I would like it much more if MAX_WRITEBACKS_IN_PASS(5) defines a 
little bigger value.

Reviewed-by: Tang Junhui <tang.junhui@xxxxxxxxxx>

>Previously, there was some logic that attempted to immediately issue
>writeback of backing-contiguous blocks when the writeback rate was
>fast.
>
>The previous logic did not have any limits on the aggregate size it
>would issue, nor the number of keys it would combine at once.  It
>would also discard the chance to do a contiguous write when the
>writeback rate was low-- e.g. at "background" writeback of target
>rate = 8, it would not combine two adjacent 4k writes and would
>instead seek the disk twice.
>
>This patch imposes limits and explicitly understands the size of
>contiguous I/O during issue.  It also will combine contiguous I/O
>in all circumstances, not just when writeback is requested to be
>relatively fast.
>
>It is a win on its own, but also lays the groundwork for skip writes to
>short keys to make the I/O more sequential/contiguous.  It also gets
>ready to start using blk_*_plug, and to allow issuing of non-contig
>I/O in parallel if requested by the user (to make use of disk
>throughput benefits available from higher queue depths).
>
>This patch fixes a previous version where the contiguous information
>was not calculated properly, thanks to suggestions by Tang Junhui.
>
>Signed-off-by: Michael Lyle <mlyle@xxxxxxxx>
>---
> drivers/md/bcache/bcache.h    |   6 ---
> drivers/md/bcache/writeback.c | 120 +++++++++++++++++++++++++++++-------------
> drivers/md/bcache/writeback.h |   3 ++
> 3 files changed, 85 insertions(+), 44 deletions(-)
>
>diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
>index 843877e017e1..1784e50eb857 100644
>--- a/drivers/md/bcache/bcache.h
>+++ b/drivers/md/bcache/bcache.h
>@@ -323,12 +323,6 @@ struct cached_dev {
>     struct bch_ratelimit    writeback_rate;
>     struct delayed_work    writeback_rate_update;
> 
>-    /*
>-     * Internal to the writeback code, so read_dirty() can keep track of
>-     * where it's at.
>-     */
>-    sector_t        last_read;
>-
>     /* Limit number of writeback bios in flight */
>     struct semaphore    in_flight;
>     struct task_struct    *writeback_thread;
>diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
>index f3d680c907ae..6975003454d9 100644
>--- a/drivers/md/bcache/writeback.c
>+++ b/drivers/md/bcache/writeback.c
>@@ -251,7 +251,9 @@ static void read_dirty_submit(struct closure *cl)
> static void read_dirty(struct cached_dev *dc)
> {
>     unsigned delay = 0;
>-    struct keybuf_key *w;
>+    struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w;
>+    size_t size;
>+    int nk, i;
>     struct dirty_io *io;
>     struct closure cl;
> 
>@@ -262,45 +264,87 @@ static void read_dirty(struct cached_dev *dc)
>      * mempools.
>      */
> 
>-    while (!kthread_should_stop()) {
>-
>-        w = bch_keybuf_next(&dc->writeback_keys);
>-        if (!w)
>-            break;
>-
>-        BUG_ON(ptr_stale(dc->disk.c, &w->key, 0));
>-
>-        if (KEY_START(&w->key) != dc->last_read ||
>-            jiffies_to_msecs(delay) > 50)
>-            while (!kthread_should_stop() && delay)
>-                delay = schedule_timeout_interruptible(delay);
>-
>-        dc->last_read    = KEY_OFFSET(&w->key);
>-
>-        io = kzalloc(sizeof(struct dirty_io) + sizeof(struct bio_vec)
>-                 * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
>-                 GFP_KERNEL);
>-        if (!io)
>-            goto err;
>-
>-        w->private    = io;
>-        io->dc        = dc;
>-
>-        dirty_init(w);
>-        bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
>-        io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
>-        bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev);
>-        io->bio.bi_end_io    = read_dirty_endio;
>-
>-        if (bio_alloc_pages(&io->bio, GFP_KERNEL))
>-            goto err_free;
>-
>-        trace_bcache_writeback(&w->key);
>+    next = bch_keybuf_next(&dc->writeback_keys);
>+
>+    while (!kthread_should_stop() && next) {
>+        size = 0;
>+        nk = 0;
>+
>+        do {
>+            BUG_ON(ptr_stale(dc->disk.c, &next->key, 0));
>+
>+            /*
>+             * Don't combine too many operations, even if they
>+             * are all small.
>+             */
>+            if (nk >= MAX_WRITEBACKS_IN_PASS)
>+                break;
>+
>+            /*
>+             * If the current operation is very large, don't
>+             * further combine operations.
>+             */
>+            if (size >= MAX_WRITESIZE_IN_PASS)
>+                break;
>+
>+            /*
>+             * Operations are only eligible to be combined
>+             * if they are contiguous.
>+             *
>+             * TODO: add a heuristic willing to fire a
>+             * certain amount of non-contiguous IO per pass,
>+             * so that we can benefit from backing device
>+             * command queueing.
>+             */
>+            if ((nk != 0) && bkey_cmp(&keys[nk-1]->key,
>+                        &START_KEY(&next->key)))
>+                break;
>+
>+            size += KEY_SIZE(&next->key);
>+            keys[nk++] = next;
>+        } while ((next = bch_keybuf_next(&dc->writeback_keys)));
>+
>+        /* Now we have gathered a set of 1..5 keys to write back. */
>+        for (i = 0; i < nk; i++) {
>+            w = keys[i];
>+
>+            io = kzalloc(sizeof(struct dirty_io) +
>+                     sizeof(struct bio_vec) *
>+                     DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
>+                     GFP_KERNEL);
>+            if (!io)
>+                goto err;
>+
>+            w->private    = io;
>+            io->dc        = dc;
>+
>+            dirty_init(w);
>+            bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
>+            io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
>+            bio_set_dev(&io->bio,
>+                    PTR_CACHE(dc->disk.c, &w->key, 0)->bdev);
>+            io->bio.bi_end_io    = read_dirty_endio;
>+
>+            if (bio_alloc_pages(&io->bio, GFP_KERNEL))
>+                goto err_free;
>+
>+            trace_bcache_writeback(&w->key);
>+
>+            down(&dc->in_flight);
>+
>+            /* We've acquired a semaphore for the maximum
>+             * simultaneous number of writebacks; from here
>+             * everything happens asynchronously.
>+             */
>+            closure_call(&io->cl, read_dirty_submit, NULL, &cl);
>+        }
> 
>-        down(&dc->in_flight);
>-        closure_call(&io->cl, read_dirty_submit, NULL, &cl);
>+        delay = writeback_delay(dc, size);
> 
>-        delay = writeback_delay(dc, KEY_SIZE(&w->key));
>+        while (!kthread_should_stop() && delay) {
>+            schedule_timeout_interruptible(delay);
>+            delay = writeback_delay(dc, 0);
>+        }
>     }
> 
>     if (0) {
>diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
>index a9e3ffb4b03c..6d26927267f8 100644
>--- a/drivers/md/bcache/writeback.h
>+++ b/drivers/md/bcache/writeback.h
>@@ -5,6 +5,9 @@
> #define CUTOFF_WRITEBACK    40
> #define CUTOFF_WRITEBACK_SYNC    70
> 
>+#define MAX_WRITEBACKS_IN_PASS  5
>+#define MAX_WRITESIZE_IN_PASS   5000    /* *512b */
>+
> static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
> {
>     uint64_t i, ret = 0;
>-- 
>2.14.1


Thanks,
Tang





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux