On Mon, Mar 4, 2019 at 1:56 PM Igor Konopko <igor.j.konopko@xxxxxxxxx> wrote: > > > > On 04.03.2019 11:05, Hans Holmberg wrote: > > On Mon, Mar 4, 2019 at 9:46 AM Javier González <javier@xxxxxxxxxxx> wrote: > >> > >>> On 27 Feb 2019, at 18.14, Igor Konopko <igor.j.konopko@xxxxxxxxx> wrote: > >>> > >>> When we creating pblk instance with factory > >>> flag, there is a possibility that some chunks > >>> are in open state, which does not allow to > >>> issue erase request to them directly. Such a > >>> chunk should be filled with some empty data in > >>> order to achieve close state. Without that we > >>> are risking that some erase operation will be > >>> rejected by the drive due to inproper chunk > >>> state. > >>> > >>> This patch implements closing chunk logic in pblk > >>> for that case, when creating instance with factory > >>> flag in order to fix that issue > >>> > >>> Signed-off-by: Igor Konopko <igor.j.konopko@xxxxxxxxx> > >>> --- > >>> drivers/lightnvm/pblk-core.c | 114 +++++++++++++++++++++++++++++++++++ > >>> drivers/lightnvm/pblk-init.c | 14 ++++- > >>> drivers/lightnvm/pblk.h | 2 + > >>> 3 files changed, 128 insertions(+), 2 deletions(-) > >>> > >>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c > >>> index fa4dc05608ff..d3c45393f093 100644 > >>> --- a/drivers/lightnvm/pblk-core.c > >>> +++ b/drivers/lightnvm/pblk-core.c > >>> @@ -161,6 +161,120 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > >>> return meta + ch_off + lun_off + chk_off; > >>> } > >>> > >>> +static void pblk_close_chunk(struct pblk *pblk, struct ppa_addr ppa, int count) > >>> +{ > >>> + struct nvm_tgt_dev *dev = pblk->dev; > >>> + struct nvm_geo *geo = &dev->geo; > >>> + struct bio *bio; > >>> + struct ppa_addr *ppa_list; > >>> + struct nvm_rq rqd; > >>> + void *meta_list, *data; > >>> + dma_addr_t dma_meta_list, dma_ppa_list; > >>> + int i, rq_ppas, rq_len, ret; > >>> + > >>> + meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); > >>> + if (!meta_list) > >>> + return; > >>> + > >>> + ppa_list = meta_list + pblk_dma_meta_size(pblk); > >>> + dma_ppa_list = dma_meta_list + pblk_dma_meta_size(pblk); > >>> + > >>> + rq_ppas = pblk_calc_secs(pblk, count, 0, false); > >>> + if (!rq_ppas) > >>> + rq_ppas = pblk->min_write_pgs; > >>> + rq_len = rq_ppas * geo->csecs; > >>> + > >>> + data = kzalloc(rq_len, GFP_KERNEL); > >>> + if (!data) > >>> + goto free_meta_list; > >>> + > >>> + memset(&rqd, 0, sizeof(struct nvm_rq)); > >>> + rqd.opcode = NVM_OP_PWRITE; > >>> + rqd.nr_ppas = rq_ppas; > >>> + rqd.meta_list = meta_list; > >>> + rqd.ppa_list = ppa_list; > >>> + rqd.dma_ppa_list = dma_ppa_list; > >>> + rqd.dma_meta_list = dma_meta_list; > >>> + > >>> +next_rq: > >>> + bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); > >>> + if (IS_ERR(bio)) > >>> + goto out_next; > >>> + > >>> + bio->bi_iter.bi_sector = 0; /* artificial bio */ > >>> + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); > >>> + > >>> + rqd.bio = bio; > >>> + for (i = 0; i < rqd.nr_ppas; i++) { > >>> + rqd.ppa_list[i] = ppa; > >>> + rqd.ppa_list[i].m.sec += i; > >>> + pblk_get_meta(pblk, meta_list, i)->lba = > >>> + cpu_to_le64(ADDR_EMPTY); > >>> + } > >>> + > >>> + ret = nvm_submit_io_sync(dev, &rqd); > >>> + if (ret) { > >>> + bio_put(bio); > >>> + goto out_next; > >>> + } > >>> + > >>> + if (rqd.error) > >>> + goto free_data; > >>> + > >>> +out_next: > >>> + count -= rqd.nr_ppas; > >>> + ppa.m.sec += rqd.nr_ppas; > >>> + if (count > 0) > >>> + goto next_rq; > >>> + > >>> +free_data: > >>> + kfree(data); > >>> +free_meta_list: > >>> + nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); > >>> +} > >>> + > >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > >>> +{ > >>> + struct nvm_tgt_dev *dev = pblk->dev; > >>> + struct nvm_geo *geo = &dev->geo; > >>> + struct nvm_chk_meta *chunk_meta; > >>> + struct ppa_addr ppa; > >>> + int i, j, k, count; > >>> + > >>> + for (i = 0; i < geo->num_chk; i++) { > >>> + for (j = 0; j < geo->num_lun; j++) { > >>> + for (k = 0; k < geo->num_ch; k++) { > >>> + ppa.ppa = 0; > >>> + ppa.m.grp = k; > >>> + ppa.m.pu = j; > >>> + ppa.m.chk = i; > >>> + > >>> + chunk_meta = pblk_chunk_get_off(pblk, > >>> + meta, ppa); > >>> + if (chunk_meta->state == NVM_CHK_ST_OPEN) { > >>> + ppa.m.sec = chunk_meta->wp; > >>> + count = geo->clba - chunk_meta->wp; > >>> + pblk_close_chunk(pblk, ppa, count); > >>> + } > >>> + } > >>> + } > >>> + } > >>> +} > >>> + > >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *meta) > >>> +{ > >>> + struct nvm_tgt_dev *dev = pblk->dev; > >>> + struct nvm_geo *geo = &dev->geo; > >>> + int i; > >>> + > >>> + for (i = 0; i < geo->all_luns; i++) { > >>> + if (meta[i].state == NVM_CHK_ST_OPEN) > >>> + return true; > >>> + } > >>> + > >>> + return false; > >>> +} > >>> + > >>> void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, > >>> u64 paddr) > >>> { > >>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c > >>> index 9913a4514eb6..83abe6960b46 100644 > >>> --- a/drivers/lightnvm/pblk-init.c > >>> +++ b/drivers/lightnvm/pblk-init.c > >>> @@ -1028,13 +1028,14 @@ static int pblk_line_meta_init(struct pblk *pblk) > >>> return 0; > >>> } > >>> > >>> -static int pblk_lines_init(struct pblk *pblk) > >>> +static int pblk_lines_init(struct pblk *pblk, bool factory_init) > >>> { > >>> struct pblk_line_mgmt *l_mg = &pblk->l_mg; > >>> struct pblk_line *line; > >>> void *chunk_meta; > >>> int nr_free_chks = 0; > >>> int i, ret; > >>> + bool retry = false; > >>> > >>> ret = pblk_line_meta_init(pblk); > >>> if (ret) > >>> @@ -1048,12 +1049,21 @@ static int pblk_lines_init(struct pblk *pblk) > >>> if (ret) > >>> goto fail_free_meta; > >>> > >>> +get_chunk_meta: > >>> chunk_meta = pblk_get_chunk_meta(pblk); > >>> if (IS_ERR(chunk_meta)) { > >>> ret = PTR_ERR(chunk_meta); > >>> goto fail_free_luns; > >>> } > >>> > >>> + if (factory_init && !retry && > >>> + pblk_are_opened_chunks(pblk, chunk_meta)) { > >>> + pblk_close_opened_chunks(pblk, chunk_meta); > >>> + retry = true; > >>> + vfree(chunk_meta); > >>> + goto get_chunk_meta; > >>> + } > >>> + > >>> pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), > >>> GFP_KERNEL); > >>> if (!pblk->lines) { > >>> @@ -1244,7 +1254,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, > >>> goto fail; > >>> } > >>> > >>> - ret = pblk_lines_init(pblk); > >>> + ret = pblk_lines_init(pblk, flags & NVM_TARGET_FACTORY); > >>> if (ret) { > >>> pblk_err(pblk, "could not initialize lines\n"); > >>> goto fail_free_core; > >>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h > >>> index b266563508e6..b248642c4dfb 100644 > >>> --- a/drivers/lightnvm/pblk.h > >>> +++ b/drivers/lightnvm/pblk.h > >>> @@ -793,6 +793,8 @@ struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk); > >>> struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, > >>> struct nvm_chk_meta *lp, > >>> struct ppa_addr ppa); > >>> +void pblk_close_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > >>> +bool pblk_are_opened_chunks(struct pblk *pblk, struct nvm_chk_meta *_meta); > >>> void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); > >>> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); > >>> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); > >>> -- > >>> 2.17.1 > >> > >> I know that the OCSSD 2.0 spec does not allow to transition from open to > >> free, but to me this is a spec bug as there is no underlying issue in > >> reading an open block. Note that all controllers I know support this, > >> and the upcoming Denali spec. fixes this too. > > > > + Klaus whom I discussed this with. > > Yeah, i think that "early reset" is a nice feature. It would be nice > > to extend the OCSSD spec with a new capabilities bit indicating if > > this is indeed supported or not. > > Matias: what do you think? > > > >> > >> Besides, the factory flag is intended to start a pblk instance > >> immediately, without having to pay the price of padding any past device > >> state. If you still want to do this, I think this belongs in a user space tool. > >> > > > > Hear, hear! > > > > Serially padding any open chunks during -f create call would be > > terrible user experience. > > Lets say that padding a chunk takes one second, we would, in a > > worst-case scenario in an example disk, be stuck in a syscall for > > 1*64*1000 seconds ~ 17 hours. > > > > You are both right - this can be time consuming. So we can drop the > "padding" part and let user do that. > > What about changing this patch to at least print some warning on dmesg > when we have some open chunks in such a case? Sounds good to me. > > > A tool, like dm-zoned's dmzadm would be the right approach, see > > Documentation/device-mapper/dm-zoned.txt > > All new pblk instances would then have to be pre-formatted with "pblkadm" > > > > A new physical storage format containing a superblock would also be a good idea. > > > > / Hans > >