This patch implements two methods to store bcache journal to, 1) __journal_write_unlocked() for block interface device The latency method to compose bio and issue the jset bio to cache device (e.g. SSD). c->journal.key.ptr[0] indicates the LBA on cache device to store the journal jset. 2) __journal_nvdimm_write_unlocked() for memory interface NVDIMM Use memory interface to access NVDIMM pages and store the jset by memcpy_flushcache(). c->journal.key.ptr[0] indicates the linear address from the NVDIMM pages to store the journal jset. For lagency configuration without NVDIMM meta device, journal I/O is handled by __journal_write_unlocked() with existing code logic. If the NVDIMM meta device is used (by bcache-tools), the journal I/O will be handled by __journal_nvdimm_write_unlocked() and go into the NVDIMM pages. And when NVDIMM meta device is used, sb.d[] stores the linear addresses from NVDIMM pages (no more bucket index), in journal_reclaim() the journaling location in c->journal.key.ptr[0] should also be updated by linear address from NVDIMM pages (no more LBA combined by sectors offset and bucket index). Signed-off-by: Coly Li <colyli@xxxxxxx> Cc: Jianpeng Ma <jianpeng.ma@xxxxxxxxx> Cc: Qiaowei Ren <qiaowei.ren@xxxxxxxxx> --- drivers/md/bcache/journal.c | 111 ++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 36 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 1f16d8e497cf..b242fcb47ce2 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -596,6 +596,8 @@ static void do_journal_discard(struct cache *ca) return; } + BUG_ON(bch_has_feature_nvdimm_meta(&ca->sb)); + switch (atomic_read(&ja->discard_in_flight)) { case DISCARD_IN_FLIGHT: return; @@ -661,9 +663,13 @@ static void journal_reclaim(struct cache_set *c) goto out; ja->cur_idx = next; - k->ptr[0] = MAKE_PTR(0, - bucket_to_sector(c, ca->sb.d[ja->cur_idx]), - ca->sb.nr_this_dev); + if (!bch_has_feature_nvdimm_meta(&ca->sb)) + k->ptr[0] = MAKE_PTR(0, + bucket_to_sector(c, ca->sb.d[ja->cur_idx]), + ca->sb.nr_this_dev); + else + k->ptr[0] = ca->sb.d[ja->cur_idx]; + atomic_long_inc(&c->reclaimed_journal_buckets); bkey_init(k); @@ -729,46 +735,21 @@ static void journal_write_unlock(struct closure *cl) spin_unlock(&c->journal.lock); } -static void journal_write_unlocked(struct closure *cl) + +static void __journal_write_unlocked(struct cache_set *c) __releases(c->journal.lock) { - struct cache_set *c = container_of(cl, struct cache_set, journal.io); - struct cache *ca = c->cache; - struct journal_write *w = c->journal.cur; struct bkey *k = &c->journal.key; - unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) * - ca->sb.block_size; - + struct journal_write *w = c->journal.cur; + struct closure *cl = &c->journal.io; + struct cache *ca = c->cache; struct bio *bio; struct bio_list list; + unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) * + ca->sb.block_size; bio_list_init(&list); - if (!w->need_write) { - closure_return_with_destructor(cl, journal_write_unlock); - return; - } else if (journal_full(&c->journal)) { - journal_reclaim(c); - spin_unlock(&c->journal.lock); - - btree_flush_write(c); - continue_at(cl, journal_write, bch_journal_wq); - return; - } - - c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca)); - - w->data->btree_level = c->root->level; - - bkey_copy(&w->data->btree_root, &c->root->key); - bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); - - w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; - w->data->magic = jset_magic(&ca->sb); - w->data->version = BCACHE_JSET_VERSION; - w->data->last_seq = last_seq(&c->journal); - w->data->csum = csum_set(w->data); - for (i = 0; i < KEY_PTRS(k); i++) { ca = PTR_CACHE(c, k, i); bio = &ca->journal.bio; @@ -793,7 +774,6 @@ static void journal_write_unlocked(struct closure *cl) ca->journal.seq[ca->journal.cur_idx] = w->data->seq; } - /* If KEY_PTRS(k) == 0, this jset gets lost in air */ BUG_ON(i == 0); @@ -805,6 +785,65 @@ static void journal_write_unlocked(struct closure *cl) while ((bio = bio_list_pop(&list))) closure_bio_submit(c, bio, cl); +} + +static void __journal_nvdimm_write_unlocked(struct cache_set *c) + __releases(c->journal.lock) +{ + struct journal_write *w = c->journal.cur; + struct cache *ca = c->cache; + unsigned int sectors; + + sectors = set_blocks(w->data, block_bytes(ca)) * ca->sb.block_size; + atomic_long_add(sectors, &ca->meta_sectors_written); + + memcpy_flushcache((void *)c->journal.key.ptr[0], w->data, sectors << 9); + + c->journal.key.ptr[0] += sectors << 9; + ca->journal.seq[ca->journal.cur_idx] = w->data->seq; + + atomic_dec_bug(&fifo_back(&c->journal.pin)); + bch_journal_next(&c->journal); + journal_reclaim(c); + + spin_unlock(&c->journal.lock); +} + +static void journal_write_unlocked(struct closure *cl) +{ + struct cache_set *c = container_of(cl, struct cache_set, journal.io); + struct cache *ca = c->cache; + struct journal_write *w = c->journal.cur; + + if (!w->need_write) { + closure_return_with_destructor(cl, journal_write_unlock); + return; + } else if (journal_full(&c->journal)) { + journal_reclaim(c); + spin_unlock(&c->journal.lock); + + btree_flush_write(c); + continue_at(cl, journal_write, bch_journal_wq); + return; + } + + c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca)); + + w->data->btree_level = c->root->level; + + bkey_copy(&w->data->btree_root, &c->root->key); + bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); + + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; + w->data->magic = jset_magic(&ca->sb); + w->data->version = BCACHE_JSET_VERSION; + w->data->last_seq = last_seq(&c->journal); + w->data->csum = csum_set(w->data); + + if (!bch_has_feature_nvdimm_meta(&ca->sb)) + __journal_write_unlocked(c); + else + __journal_nvdimm_write_unlocked(c); continue_at(cl, journal_write_done, NULL); } -- 2.26.2