This patch implements two methods to read jset from media for journal replay, - __jnl_rd_bkt() for block device This is the legacy method to read jset via block device interface. - __jnl_rd_nvm_bkt() for NVDIMM This is the method to read jset from NVDIMM memory interface, a.k.a memcopy() from NVDIMM pages to DRAM pages. If BCH_FEATURE_INCOMPAT_NVDIMM_META is set in incompat feature set, during running cache set, journal_read_bucket() will read the journal content from NVDIMM by __jnl_rd_nvm_bkt(). The linear addresses of NVDIMM pages to read jset are stored in sb.d[SB_JOURNAL_BUCKETS], which were initialized and maintained in previous runs of the cache set. A thing should be noticed is, when bch_journal_read() is called, the linear address of NVDIMM pages is not loaded and initialized yet, it is necessary to call __bch_journal_nvdimm_init() before reading the jset from NVDIMM pages. The code comments added in journal_read_bucket() is noticed by kernel test robot and Dan Carpenter, it explains why it is safe to only check !bch_has_feature_nvdimm_meta() condition in the if() statement when CONFIG_BCACHE_NVM_PAGES is not configured. To avoid confusion from the bogus warning message from static checking tool. Signed-off-by: Coly Li <colyli@xxxxxxx> Reported-by: kernel test robot <lkp@xxxxxxxxx> Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Hannes Reinecke <hare@xxxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: Jianpeng Ma <jianpeng.ma@xxxxxxxxx> Cc: Qiaowei Ren <qiaowei.ren@xxxxxxxxx> --- drivers/md/bcache/journal.c | 88 ++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 17 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8cd0c4dc9137..987306b4db20 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -34,18 +34,60 @@ static void journal_read_endio(struct bio *bio) closure_put(cl); } +static struct jset *__jnl_rd_bkt(struct cache *ca, unsigned int bkt_idx, + unsigned int len, unsigned int offset, + struct closure *cl) +{ + sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bkt_idx]); + struct bio *bio = &ca->journal.bio; + struct jset *data = ca->set->journal.w[0].data; + + bio_reset(bio); + bio->bi_iter.bi_sector = bucket + offset; + bio_set_dev(bio, ca->bdev); + bio->bi_iter.bi_size = len << 9; + + bio->bi_end_io = journal_read_endio; + bio->bi_private = cl; + bio_set_op_attrs(bio, REQ_OP_READ, 0); + bch_bio_map(bio, data); + + closure_bio_submit(ca->set, bio, cl); + closure_sync(cl); + + /* Indeed journal.w[0].data */ + return data; +} + +#if defined(CONFIG_BCACHE_NVM_PAGES) + +static struct jset *__jnl_rd_nvm_bkt(struct cache *ca, unsigned int bkt_idx, + unsigned int len, unsigned int offset) +{ + void *jset_addr; + struct jset *data; + + jset_addr = bch_nvmpg_offset_to_ptr(ca->sb.d[bkt_idx]) + (offset << 9); + data = ca->set->journal.w[0].data; + + memcpy(data, jset_addr, len << 9); + + /* Indeed journal.w[0].data */ + return data; +} + +#endif /* CONFIG_BCACHE_NVM_PAGES */ + static int journal_read_bucket(struct cache *ca, struct list_head *list, unsigned int bucket_index) { struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio; struct journal_replay *i; - struct jset *j, *data = ca->set->journal.w[0].data; + struct jset *j; struct closure cl; unsigned int len, left, offset = 0; int ret = 0; - sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); closure_init_stack(&cl); @@ -55,26 +97,27 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, reread: left = ca->sb.bucket_size - offset; len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); - bio_reset(bio); - bio->bi_iter.bi_sector = bucket + offset; - bio_set_dev(bio, ca->bdev); - bio->bi_iter.bi_size = len << 9; - - bio->bi_end_io = journal_read_endio; - bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, 0); - bch_bio_map(bio, data); - - closure_bio_submit(ca->set, bio, &cl); - closure_sync(&cl); + if (!bch_has_feature_nvdimm_meta(&ca->sb)) + j = __jnl_rd_bkt(ca, bucket_index, len, offset, &cl); + /* + * If CONFIG_BCACHE_NVM_PAGES is not defined, the feature bit + * BCH_FEATURE_INCOMPAT_NVDIMM_META won't in incompatible + * support feature set, a cache device format with feature bit + * BCH_FEATURE_INCOMPAT_NVDIMM_META will fail much earlier in + * read_super() by bch_has_unknown_incompat_features(). + * Therefore when CONFIG_BCACHE_NVM_PAGES is not define, it is + * safe to ignore the bch_has_feature_nvdimm_meta() condition. + */ +#if defined(CONFIG_BCACHE_NVM_PAGES) + else + j = __jnl_rd_nvm_bkt(ca, bucket_index, len, offset); +#endif /* This function could be simpler now since we no longer write * journal entries that overlap bucket boundaries; this means * the start of a bucket will always have a valid journal entry * if it has any journal entries at all. */ - - j = data; while (len) { struct list_head *where; size_t blocks, bytes = set_bytes(j); @@ -170,6 +213,8 @@ reread: left = ca->sb.bucket_size - offset; return ret; } +static int __bch_journal_nvdimm_init(struct cache *ca); + int bch_journal_read(struct cache_set *c, struct list_head *list) { #define read_bucket(b) \ @@ -188,6 +233,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) unsigned int i, l, r, m; uint64_t seq; + /* + * Linear addresses of NVDIMM pages for journaling is not + * initialized yet, do it before read jset from NVDIMM pages. + */ + if (bch_has_feature_nvdimm_meta(&ca->sb)) { + if (__bch_journal_nvdimm_init(ca) < 0) + return -ENXIO; + } + bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); -- 2.26.2