> On 26 Jun 2018, at 20.47, Heiner Litz <hlitz@xxxxxxxx> wrote: > >> On Fri, Jun 22, 2018 at 11:17 AM Matias Bjørling <mb@xxxxxxxxxxx> wrote: >> >>> On 06/18/2018 07:56 PM, Heiner Litz wrote: >>> In the read path, partial reads are currently performed synchronously >>> which affects performance for workloads that generate many partial >>> reads. This patch adds an asynchronous partial read path as well as >>> the required partial read ctx. >>> >>> Signed-off-by: Heiner Litz <hlitz@xxxxxxxx> >>> --- >>> drivers/lightnvm/pblk-read.c | 183 ++++++++++++++++++++++++++++--------------- >>> drivers/lightnvm/pblk.h | 10 +++ >>> 2 files changed, 130 insertions(+), 63 deletions(-) >>> >>> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c >>> index 6e93c48..828df98 100644 >>> --- a/drivers/lightnvm/pblk-read.c >>> +++ b/drivers/lightnvm/pblk-read.c >>> @@ -231,74 +231,36 @@ static void pblk_end_io_read(struct nvm_rq *rqd) >>> __pblk_end_io_read(pblk, rqd, true); >>> } >>> >>> -static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd, >>> - struct bio *orig_bio, unsigned int bio_init_idx, >>> - unsigned long *read_bitmap) >>> +static void pblk_end_partial_read(struct nvm_rq *rqd) >>> { >>> - struct pblk_sec_meta *meta_list = rqd->meta_list; >>> - struct bio *new_bio; >>> + struct pblk *pblk = rqd->private; >>> + struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); >>> + struct pblk_pr_ctx *pr_ctx = r_ctx->private; >>> + struct bio *new_bio = rqd->bio; >>> + struct bio *bio = pr_ctx->orig_bio; >>> struct bio_vec src_bv, dst_bv; >>> - void *ppa_ptr = NULL; >>> - void *src_p, *dst_p; >>> - dma_addr_t dma_ppa_list = 0; >>> - __le64 *lba_list_mem, *lba_list_media; >>> - int nr_secs = rqd->nr_ppas; >>> + struct pblk_sec_meta *meta_list = rqd->meta_list; >>> + int bio_init_idx = pr_ctx->bio_init_idx; >>> + unsigned long *read_bitmap = &pr_ctx->bitmap; >>> + int nr_secs = pr_ctx->orig_nr_secs; >>> int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); >>> - int i, ret, hole; >>> - >>> - /* Re-use allocated memory for intermediate lbas */ >>> - lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); >>> - lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); >>> - >>> - new_bio = bio_alloc(GFP_KERNEL, nr_holes); >>> - >>> - if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) >>> - goto fail_add_pages; >>> - >>> - if (nr_holes != new_bio->bi_vcnt) { >>> - pr_err("pblk: malformed bio\n"); >>> - goto fail; >>> - } >>> - >>> - for (i = 0; i < nr_secs; i++) >>> - lba_list_mem[i] = meta_list[i].lba; >>> - >>> - new_bio->bi_iter.bi_sector = 0; /* internal bio */ >>> - bio_set_op_attrs(new_bio, REQ_OP_READ, 0); >>> - >>> - rqd->bio = new_bio; >>> - rqd->nr_ppas = nr_holes; >>> - rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); >>> - >>> - if (unlikely(nr_holes == 1)) { >>> - ppa_ptr = rqd->ppa_list; >>> - dma_ppa_list = rqd->dma_ppa_list; >>> - rqd->ppa_addr = rqd->ppa_list[0]; >>> - } >>> - >>> - ret = pblk_submit_io_sync(pblk, rqd); >>> - if (ret) { >>> - bio_put(rqd->bio); >>> - pr_err("pblk: sync read IO submission failed\n"); >>> - goto fail; >>> - } >>> - >>> - if (rqd->error) { >>> - atomic_long_inc(&pblk->read_failed); >>> -#ifdef CONFIG_NVM_PBLK_DEBUG >>> - pblk_print_failed_rqd(pblk, rqd, rqd->error); >>> -#endif >>> - } >>> + __le64 *lba_list_mem, *lba_list_media; >>> + void *src_p, *dst_p; >>> + int hole, i; >>> >>> if (unlikely(nr_holes == 1)) { >>> struct ppa_addr ppa; >>> >>> ppa = rqd->ppa_addr; >>> - rqd->ppa_list = ppa_ptr; >>> - rqd->dma_ppa_list = dma_ppa_list; >>> + rqd->ppa_list = pr_ctx->ppa_ptr; >>> + rqd->dma_ppa_list = pr_ctx->dma_ppa_list; >>> rqd->ppa_list[0] = ppa; >>> } >>> >>> + /* Re-use allocated memory for intermediate lbas */ >>> + lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); >>> + lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); >>> + >>> for (i = 0; i < nr_secs; i++) { >>> lba_list_media[i] = meta_list[i].lba; >>> meta_list[i].lba = lba_list_mem[i]; >>> @@ -316,7 +278,7 @@ static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd, >>> meta_list[hole].lba = lba_list_media[i]; >>> >>> src_bv = new_bio->bi_io_vec[i++]; >>> - dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole]; >>> + dst_bv = bio->bi_io_vec[bio_init_idx + hole]; >>> >>> src_p = kmap_atomic(src_bv.bv_page); >>> dst_p = kmap_atomic(dst_bv.bv_page); >>> @@ -334,19 +296,107 @@ static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd, >>> } while (hole < nr_secs); >>> >>> bio_put(new_bio); >>> + kfree(pr_ctx); >>> >>> /* restore original request */ >>> rqd->bio = NULL; >>> rqd->nr_ppas = nr_secs; >>> >>> + bio_endio(bio); >>> __pblk_end_io_read(pblk, rqd, false); >>> - return NVM_IO_DONE; >>> +} >>> >>> -fail: >>> - /* Free allocated pages in new bio */ >>> +static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd, >>> + unsigned int bio_init_idx, >>> + unsigned long *read_bitmap, >>> + int nr_holes) >>> +{ >>> + struct pblk_sec_meta *meta_list = rqd->meta_list; >>> + struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); >>> + struct pblk_pr_ctx *pr_ctx; >>> + struct bio *new_bio, *bio = r_ctx->private; >>> + __le64 *lba_list_mem; >>> + int nr_secs = rqd->nr_ppas; >>> + int i; >>> + >>> + /* Re-use allocated memory for intermediate lbas */ >>> + lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); >>> + >>> + new_bio = bio_alloc(GFP_KERNEL, nr_holes); >> >> >> new_bio can return NULL. > > see Jens's email Yes, this is the same as in the original partial path > >> >>> + >>> + if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) >>> + goto fail; >> >> goto bio_put? > > do you only want the label to be changed? sure To be consistent with the rest of pblk, the label should be fail_bio_put. > >>> + >>> + if (nr_holes != new_bio->bi_vcnt) { >>> + pr_err("pblk: malformed bio\n"); >> >> I don't think there is a need for an error message here. In which case >> would this happen? > > This is taken over from the original partial read path. Maybe Javier > knows why it was put in there in the first place. This should not happen and it’s a pblk internal error. I’m ok with making it a WARN_ONCE() > >> >>> + goto fail_pages; >>> + } >> >> goto free_pages? Same as above fail_free_pages > > sure I can change the label > >> >>> + >>> + pr_ctx = kmalloc(sizeof(struct pblk_pr_ctx), GFP_KERNEL); >>> + if (!pr_ctx) >>> + goto fail_pages; >>> + >>> + for (i = 0; i < nr_secs; i++) >>> + lba_list_mem[i] = meta_list[i].lba; >>> + >>> + new_bio->bi_iter.bi_sector = 0; /* internal bio */ >>> + bio_set_op_attrs(new_bio, REQ_OP_READ, 0); >>> + >>> + rqd->bio = new_bio; >>> + rqd->nr_ppas = nr_holes; >>> + rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); >>> + >>> + pr_ctx->ppa_ptr = NULL; >>> + pr_ctx->orig_bio = bio; >>> + pr_ctx->bitmap = *read_bitmap; >>> + pr_ctx->bio_init_idx = bio_init_idx; >>> + pr_ctx->orig_nr_secs = nr_secs; >>> + r_ctx->private = pr_ctx; >>> + >>> + if (unlikely(nr_holes == 1)) { >>> + pr_ctx->ppa_ptr = rqd->ppa_list; >>> + pr_ctx->dma_ppa_list = rqd->dma_ppa_list; >>> + rqd->ppa_addr = rqd->ppa_list[0]; >>> + } >>> + return 0; >>> + >>> +fail_pages: >>> pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt); >>> -fail_add_pages: >>> +fail: >>> + bio_put(new_bio); >>> + >>> + return -ENOMEM; >>> +} >>> + >>> +static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, >>> + unsigned int bio_init_idx, >>> + unsigned long *read_bitmap, int nr_secs) >>> +{ >>> + int nr_holes; >>> + int ret; >>> + >>> + nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); >>> + >>> + if (pblk_setup_partial_read(pblk, rqd, bio_init_idx, read_bitmap, >>> + nr_holes)) >>> + return NVM_IO_ERR; >>> + >>> + rqd->end_io = pblk_end_partial_read; >>> + >>> + ret = pblk_submit_io(pblk, rqd); >>> + if (ret) { >>> + bio_put(rqd->bio); >>> + pr_err("pblk: partial read IO submission failed\n"); >>> + goto err; >>> + } >>> + >>> + return NVM_IO_OK; >>> + >>> +err: >>> pr_err("pblk: failed to perform partial read\n"); >>> + >>> + /* Free allocated pages in new bio */ >>> + pblk_bio_free_pages(pblk, rqd->bio, 0, rqd->bio->bi_vcnt); >>> __pblk_end_io_read(pblk, rqd, false); >>> return NVM_IO_ERR; >>> } >>> @@ -480,8 +530,15 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) >>> /* The read bio request could be partially filled by the write buffer, >>> * but there are some holes that need to be read from the drive. >>> */ >>> - return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap); >>> + ret = pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap, >>> + nr_secs); >>> + if (ret) >>> + goto fail_meta_free; >>> + >>> + return NVM_IO_OK; >>> >>> +fail_meta_free: >>> + nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); >>> fail_rqd_free: >>> pblk_free_rqd(pblk, rqd, PBLK_READ); >>> return ret; >>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h >>> index c072955..1c7ac06 100644 >>> --- a/drivers/lightnvm/pblk.h >>> +++ b/drivers/lightnvm/pblk.h >>> @@ -119,6 +119,16 @@ struct pblk_g_ctx { >>> u64 lba; >>> }; >>> >>> +/* partial read context */ >>> +struct pblk_pr_ctx { >>> + struct bio *orig_bio; >>> + unsigned long bitmap; >> >> Guarantees that this is at least 32bit, but this should be at least 64 >> bit right? >> >>> + unsigned int orig_nr_secs; >>> + unsigned int bio_init_idx; >>> + void *ppa_ptr; >>> + dma_addr_t dma_ppa_list; >>> +}; >>> + >>> /* Pad context */ >>> struct pblk_pad_rq { >>> struct pblk *pblk; >>> >>