From: Coly Li <colyli@xxxxxxx> commit 249a5f6da57c28a903c75d81505d58ec8c10030d upstream. This reverts commit c4dc2497d50d9c6fb16aa0d07b6a14f3b2adb1e0. This patch enlarges a race between normal btree flush code path and flush_btree_write(), which causes deadlock when journal space is exhausted. Reverts this patch makes the race window from 128 btree nodes to only 1 btree nodes. Fixes: c4dc2497d50d ("bcache: fix high CPU occupancy during journal") Signed-off-by: Coly Li <colyli@xxxxxxx> Cc: stable@xxxxxxxxxxxxxxx Cc: Tang Junhui <tang.junhui.linux@xxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- drivers/md/bcache/bcache.h | 2 - drivers/md/bcache/journal.c | 47 ++++++++++++++------------------------------ drivers/md/bcache/util.h | 2 - 3 files changed, 15 insertions(+), 36 deletions(-) --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -708,8 +708,6 @@ struct cache_set { #define BUCKET_HASH_BITS 12 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; - - DECLARE_HEAP(struct btree *, flush_btree); }; struct bbio { --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -390,12 +390,6 @@ err: } /* Journalling */ -#define journal_max_cmp(l, r) \ - (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) < \ - fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal)) -#define journal_min_cmp(l, r) \ - (fifo_idx(&c->journal.pin, btree_current_write(l)->journal) > \ - fifo_idx(&(c)->journal.pin, btree_current_write(r)->journal)) static void btree_flush_write(struct cache_set *c) { @@ -403,35 +397,25 @@ static void btree_flush_write(struct cac * Try to find the btree node with that references the oldest journal * entry, best is our current candidate and is locked if non NULL: */ - struct btree *b; - int i; + struct btree *b, *best; + unsigned int i; atomic_long_inc(&c->flush_write); - retry: - spin_lock(&c->journal.lock); - if (heap_empty(&c->flush_btree)) { - for_each_cached_btree(b, c, i) - if (btree_current_write(b)->journal) { - if (!heap_full(&c->flush_btree)) - heap_add(&c->flush_btree, b, - journal_max_cmp); - else if (journal_max_cmp(b, - heap_peek(&c->flush_btree))) { - c->flush_btree.data[0] = b; - heap_sift(&c->flush_btree, 0, - journal_max_cmp); - } - } - - for (i = c->flush_btree.used / 2 - 1; i >= 0; --i) - heap_sift(&c->flush_btree, i, journal_min_cmp); - } + best = NULL; - b = NULL; - heap_pop(&c->flush_btree, b, journal_min_cmp); - spin_unlock(&c->journal.lock); + for_each_cached_btree(b, c, i) + if (btree_current_write(b)->journal) { + if (!best) + best = b; + else if (journal_pin_cmp(c, + btree_current_write(best)->journal, + btree_current_write(b)->journal)) { + best = b; + } + } + b = best; if (b) { mutex_lock(&b->write_lock); if (!btree_current_write(b)->journal) { @@ -873,8 +857,7 @@ int bch_journal_alloc(struct cache_set * j->w[0].c = c; j->w[1].c = c; - if (!(init_heap(&c->flush_btree, 128, GFP_KERNEL)) || - !(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || + if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) return -ENOMEM; --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -113,8 +113,6 @@ do { \ #define heap_full(h) ((h)->used == (h)->size) -#define heap_empty(h) ((h)->used == 0) - #define DECLARE_FIFO(type, name) \ struct { \ size_t front, back, size, mask; \