> On 17 Aug 2018, at 13.14, Matias Bjørling <mb@xxxxxxxxxxx> wrote: > > pblk implements two data paths for recovery line state. One for 1.2 > and another for 2.0, instead of having pblk implement these, combine > them in the core to reduce complexity and make available to other > targets. > > The new interface will adhere to the 2.0 chunk definition, > including managing open chunks with an active write pointer. To provide > this interface, a 1.2 device recovers the state of the chunks by > manually detecting if a chunk is either free/open/close/offline, and if > open, scanning the flash pages sequentially to find the next writeable > page. This process takes on average ~10 seconds on a device with 64 dies, > 1024 blocks and 60us read access time. The process can be parallelized > but is left out for maintenance simplicity, as the 1.2 specification is > deprecated. For 2.0 devices, the logic is maintained internally in the > drive and retrieved through the 2.0 interface. > > Signed-off-by: Matias Bjørling <mb@xxxxxxxxxxx> > --- > drivers/lightnvm/core.c | 310 +++++++++++++++++++++++++++++++++++-------- > drivers/lightnvm/pblk-core.c | 6 +- > drivers/lightnvm/pblk-init.c | 116 +--------------- > drivers/lightnvm/pblk.h | 2 +- > drivers/nvme/host/lightnvm.c | 4 +- > include/linux/lightnvm.h | 15 +-- > 6 files changed, 266 insertions(+), 187 deletions(-) > > diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c > index 964352720a03..003fc073f496 100644 > --- a/drivers/lightnvm/core.c > +++ b/drivers/lightnvm/core.c > @@ -717,46 +717,6 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, > nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); > } > > -int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta, > - struct ppa_addr ppa, int nchks) > -{ > - struct nvm_dev *dev = tgt_dev->parent; > - > - nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); > - > - return dev->ops->get_chk_meta(tgt_dev->parent, meta, > - (sector_t)ppa.ppa, nchks); > -} > -EXPORT_SYMBOL(nvm_get_chunk_meta); > - > -int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, > - int nr_ppas, int type) > -{ > - struct nvm_dev *dev = tgt_dev->parent; > - struct nvm_rq rqd; > - int ret; > - > - if (nr_ppas > NVM_MAX_VLBA) { > - pr_err("nvm: unable to update all blocks atomically\n"); > - return -EINVAL; > - } > - > - memset(&rqd, 0, sizeof(struct nvm_rq)); > - > - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); > - nvm_rq_tgt_to_dev(tgt_dev, &rqd); > - > - ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); > - nvm_free_rqd_ppalist(tgt_dev, &rqd); > - if (ret) { > - pr_err("nvm: failed bb mark\n"); > - return -EINVAL; > - } > - > - return 0; > -} > -EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); > - > static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd) > { > int flags = 0; > @@ -830,27 +790,159 @@ void nvm_end_io(struct nvm_rq *rqd) > } > EXPORT_SYMBOL(nvm_end_io); > > +static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd) > +{ > + if (!dev->ops->submit_io_sync) > + return -ENODEV; > + > + rqd->flags = nvm_set_flags(&dev->geo, rqd); > + > + return dev->ops->submit_io_sync(dev, rqd); > +} > + > +static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) > +{ > + struct nvm_rq rqd = { NULL }; > + struct bio bio; > + struct bio_vec bio_vec; > + struct page *page; > + int ret; > + > + page = alloc_page(GFP_KERNEL); > + if (!page) > + return -ENOMEM; > + > + bio_init(&bio, &bio_vec, 1); > + bio_add_page(&bio, page, PAGE_SIZE, 0); > + bio_set_op_attrs(&bio, REQ_OP_READ, 0); > + > + rqd.bio = &bio; > + rqd.opcode = NVM_OP_PREAD; > + rqd.is_seq = 1; > + rqd.nr_ppas = 1; > + rqd.ppa_addr = generic_to_dev_addr(dev, ppa); > + > + ret = nvm_submit_io_sync_raw(dev, &rqd); > + if (ret) > + return ret; > + > + __free_page(page); > + > + return rqd.error; > +} > + > /* > - * folds a bad block list from its plane representation to its virtual > - * block representation. The fold is done in place and reduced size is > - * returned. > + * Scans a 1.2 chunk first and last page to determine if its state. > + * If the chunk is found to be open, also scan it to update the write > + * pointer. > + */ > +static int nvm_bb_scan_chunk(struct nvm_dev *dev, struct ppa_addr ppa, > + struct nvm_chk_meta *meta) > +{ > + struct nvm_geo *geo = &dev->geo; > + int ret, pg, pl; > + > + /* sense first page */ > + ret = nvm_bb_chunk_sense(dev, ppa); > + if (ret < 0) /* io error */ > + return ret; > + else if (ret == 0) /* valid data */ > + meta->state = NVM_CHK_ST_OPEN; > + else if (ret > 0) { > + /* > + * If empty page, the chunk is free, else it is an > + * actual io error. In that case, mark it offline. > + */ > + switch (ret) { > + case NVM_RSP_ERR_EMPTYPAGE: > + meta->state = NVM_CHK_ST_FREE; > + return 0; > + case NVM_RSP_ERR_FAILCRC: > + case NVM_RSP_ERR_FAILECC: > + case NVM_RSP_WARN_HIGHECC: > + meta->state = NVM_CHK_ST_OPEN; > + goto scan; > + default: > + return -ret; /* other io error */ > + } > + } > + > + /* sense last page */ > + ppa.g.pg = geo->num_pg - 1; > + ppa.g.pl = geo->num_pln - 1; > + > + ret = nvm_bb_chunk_sense(dev, ppa); > + if (ret < 0) /* io error */ > + return ret; > + else if (ret == 0) { /* Chunk fully written */ > + meta->state = NVM_CHK_ST_CLOSED; > + meta->wp = geo->clba; > + return 0; > + } else if (ret > 0) { > + switch (ret) { > + case NVM_RSP_ERR_EMPTYPAGE: > + case NVM_RSP_ERR_FAILCRC: > + case NVM_RSP_ERR_FAILECC: > + case NVM_RSP_WARN_HIGHECC: > + meta->state = NVM_CHK_ST_OPEN; > + break; > + default: > + return -ret; /* other io error */ > + } > + } > + > +scan: > + /* > + * chunk is open, we scan sequentially to update the write pointer. > + * We make the assumption that targets write data across all planes > + * before moving to the next page. > + */ > + for (pg = 0; pg < geo->num_pg; pg++) { > + for (pl = 0; pl < geo->num_pln; pl++) { > + ppa.g.pg = pg; > + ppa.g.pl = pl; > + > + ret = nvm_bb_chunk_sense(dev, ppa); > + if (ret < 0) /* io error */ > + return ret; > + else if (ret == 0) { > + meta->wp += geo->ws_min; > + } else if (ret > 0) { > + switch (ret) { > + case NVM_RSP_ERR_EMPTYPAGE: > + return 0; > + case NVM_RSP_ERR_FAILCRC: > + case NVM_RSP_ERR_FAILECC: > + case NVM_RSP_WARN_HIGHECC: > + meta->wp += geo->ws_min; > + break; > + default: > + return -ret; /* other io error */ > + } > + } > + } > + } > + > + return 0; > +} > + > +/* > + * folds a bad block list from its plane representation to its > + * chunk representation. > * > - * If any of the planes status are bad or grown bad block, the virtual block > - * is marked bad. If not bad, the first plane state acts as the block state. > + * If any of the planes status are bad or grown bad, the chunk is marked > + * offline. If not bad, the first plane state acts as the chunk state. > */ > -int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) > +static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa, > + u8 *blks, int nr_blks, struct nvm_chk_meta *meta) > { > struct nvm_geo *geo = &dev->geo; > - int blk, offset, pl, blktype; > - > - if (nr_blks != geo->num_chk * geo->pln_mode) > - return -EINVAL; > + int ret, blk, pl, offset, blktype; > > for (blk = 0; blk < geo->num_chk; blk++) { > offset = blk * geo->pln_mode; > blktype = blks[offset]; > > - /* Bad blocks on any planes take precedence over other types */ > for (pl = 0; pl < geo->pln_mode; pl++) { > if (blks[offset + pl] & > (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { > @@ -859,23 +951,125 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) > } > } > > - blks[blk] = blktype; > + meta->wp = 0; > + meta->type = NVM_CHK_TP_W_SEQ; > + meta->wi = 0; > + meta->slba = generic_to_dev_addr(dev, ppa).ppa; > + meta->cnlb = dev->geo.clba; > + > + if (blktype == NVM_BLK_T_FREE) { > + ppa.a.blk = blk; This line should be moved above meta->slba assignment. Otherwise, the slba address does not consider the block. Also, since this is 1.2 specific, can you use g, instead of a? I'll send a patch later today implementing chunk metadata retrieval on erase which relies on this. I'll put it in a small commit in the series fixing this; feel free to merge it into your patch. Javier
Attachment:
signature.asc
Description: Message signed with OpenPGP