On 03/16/2018 07:37 AM, Alex Elder wrote: > From: Ilya Dryomov <idryomov@xxxxxxxxx> > > In preparation for rbd "fancy" striping which requires bio_vec arrays, > wire up BVECS data type and kill off PAGES data type. There is nothing > wrong with using page vectors for copyup requests -- it's just less > iterator boilerplate code to write for the new striping framework. > > Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx> Other than my repetitive comments about your macro use, this looks good to me. You should define setup_copyup_bvecs() before it's needed in its source file, to avoid the need for a separate declaration. I'm hoping you'll be reworking the code to avoid the confusing macros. But aside from that: Reviewed-by: Alex Elder <elder@xxxxxxxxxx> > --- > drivers/block/rbd.c | 155 ++++++++++++++++++++++++++-------------------------- > 1 file changed, 77 insertions(+), 78 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 722422e62b36..5ca190b15e1d 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -219,7 +219,7 @@ typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); > enum obj_request_type { > OBJ_REQUEST_NODATA = 1, > OBJ_REQUEST_BIO, /* pointer into provided bio (list) */ > - OBJ_REQUEST_PAGES, > + OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */ > }; > > enum obj_operation_type { > @@ -272,12 +272,12 @@ struct rbd_obj_request { > union { > struct ceph_bio_iter bio_pos; > struct { > - struct page **pages; > - u32 page_count; > + struct ceph_bvec_iter bvec_pos; > + u32 bvec_count; > }; > }; > - struct page **copyup_pages; > - u32 copyup_page_count; > + struct bio_vec *copyup_bvecs; > + u32 copyup_bvec_count; > > struct ceph_osd_request *osd_req; > > @@ -1272,36 +1272,14 @@ static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes) > })); > } > > -/* > - * similar to zero_bio_chain(), zeros data defined by a page array, > - * starting at the given byte offset from the start of the array and > - * continuing up to the given end offset. The pages array is > - * assumed to be big enough to hold all bytes up to the end. > - */ > -static void zero_pages(struct page **pages, u64 offset, u64 end) > +static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes) > { > - struct page **page = &pages[offset >> PAGE_SHIFT]; > + struct ceph_bvec_iter it = *bvec_pos; > > - rbd_assert(end > offset); > - rbd_assert(end - offset <= (u64)SIZE_MAX); > - while (offset < end) { > - size_t page_offset; > - size_t length; > - unsigned long flags; > - void *kaddr; > - > - page_offset = offset & ~PAGE_MASK; > - length = min_t(size_t, PAGE_SIZE - page_offset, end - offset); > - local_irq_save(flags); > - kaddr = kmap_atomic(*page); > - memset(kaddr + page_offset, 0, length); > - flush_dcache_page(*page); > - kunmap_atomic(kaddr); > - local_irq_restore(flags); > - > - offset += length; > - page++; > - } > + ceph_bvec_iter_advance(&it, off); > + ceph_bvec_iter_advance_step(&it, bytes, ({ > + zero_bvec(&bv); > + })); > } > > /* > @@ -1461,7 +1439,7 @@ static bool obj_request_type_valid(enum obj_request_type type) > switch (type) { > case OBJ_REQUEST_NODATA: > case OBJ_REQUEST_BIO: > - case OBJ_REQUEST_PAGES: > + case OBJ_REQUEST_BVECS: > return true; > default: > return false; > @@ -1611,14 +1589,15 @@ rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) > if (obj_request->type == OBJ_REQUEST_BIO) > zero_bios(&obj_request->bio_pos, 0, length); > else > - zero_pages(obj_request->pages, 0, length); > + zero_bvecs(&obj_request->bvec_pos, 0, length); > obj_request->result = 0; > } else if (xferred < length && !obj_request->result) { > if (obj_request->type == OBJ_REQUEST_BIO) > zero_bios(&obj_request->bio_pos, xferred, > length - xferred); > else > - zero_pages(obj_request->pages, xferred, length); > + zero_bvecs(&obj_request->bvec_pos, xferred, > + length - xferred); > } > obj_request->xferred = length; > obj_request_done_set(obj_request); > @@ -1913,6 +1892,7 @@ rbd_obj_request_create(enum obj_request_type type) > static void rbd_obj_request_destroy(struct kref *kref) > { > struct rbd_obj_request *obj_request; > + u32 i; > > obj_request = container_of(kref, struct rbd_obj_request, kref); > > @@ -1924,22 +1904,22 @@ static void rbd_obj_request_destroy(struct kref *kref) > if (obj_request->osd_req) > rbd_osd_req_destroy(obj_request->osd_req); > > - rbd_assert(obj_request_type_valid(obj_request->type)); > switch (obj_request->type) { > case OBJ_REQUEST_NODATA: > case OBJ_REQUEST_BIO: > + case OBJ_REQUEST_BVECS: > break; /* Nothing to do */ > - case OBJ_REQUEST_PAGES: > - /* img_data requests don't own their page array */ > - if (obj_request->pages && > - !obj_request_img_data_test(obj_request)) > - ceph_release_page_vector(obj_request->pages, > - obj_request->page_count); > - break; > + default: > + rbd_assert(0); > } > > - ceph_release_page_vector(obj_request->copyup_pages, > - obj_request->copyup_page_count); > + if (obj_request->copyup_bvecs) { > + for (i = 0; i < obj_request->copyup_bvec_count; i++) { > + if (obj_request->copyup_bvecs[i].bv_page) > + __free_page(obj_request->copyup_bvecs[i].bv_page); > + } > + kfree(obj_request->copyup_bvecs); > + } > > kmem_cache_free(rbd_obj_request_cache, obj_request); > } > @@ -2260,10 +2240,9 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, > if (obj_request->type == OBJ_REQUEST_BIO) > osd_req_op_extent_osd_data_bio(osd_request, num_ops, > &obj_request->bio_pos, length); > - else if (obj_request->type == OBJ_REQUEST_PAGES) > - osd_req_op_extent_osd_data_pages(osd_request, num_ops, > - obj_request->pages, length, > - offset & ~PAGE_MASK, false, false); > + else if (obj_request->type == OBJ_REQUEST_BVECS) > + osd_req_op_extent_osd_data_bvecs(osd_request, num_ops, > + &obj_request->bvec_pos); > > /* Discards are also writes */ > if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) > @@ -2288,7 +2267,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, > struct rbd_obj_request *obj_request = NULL; > struct rbd_obj_request *next_obj_request; > struct ceph_bio_iter bio_it; > - struct page **pages = NULL; > + struct ceph_bvec_iter bvec_it; > enum obj_operation_type op_type; > u64 img_offset; > u64 resid; > @@ -2305,8 +2284,8 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, > bio_it = *(struct ceph_bio_iter *)data_desc; > rbd_assert(img_offset == > bio_it.iter.bi_sector << SECTOR_SHIFT); > - } else if (type == OBJ_REQUEST_PAGES) { > - pages = data_desc; > + } else if (type == OBJ_REQUEST_BVECS) { > + bvec_it = *(struct ceph_bvec_iter *)data_desc; > } > > while (resid) { > @@ -2332,15 +2311,10 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, > if (type == OBJ_REQUEST_BIO) { > obj_request->bio_pos = bio_it; > ceph_bio_iter_advance(&bio_it, length); > - } else if (type == OBJ_REQUEST_PAGES) { > - unsigned int page_count; > - > - obj_request->pages = pages; > - page_count = (u32)calc_pages_for(offset, length); > - obj_request->page_count = page_count; > - if ((offset + length) & ~PAGE_MASK) > - page_count--; /* more on last page */ > - pages += page_count; > + } else if (type == OBJ_REQUEST_BVECS) { > + obj_request->bvec_pos = bvec_it; > + ceph_bvec_iter_shorten(&obj_request->bvec_pos, length); > + ceph_bvec_iter_advance(&bvec_it, length); > } > > osd_req = rbd_osd_req_create(rbd_dev, op_type, > @@ -2452,8 +2426,8 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) > /* Initialize the copyup op */ > > osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); > - osd_req_op_cls_request_data_pages(osd_req, 0, orig_request->copyup_pages, > - parent_length, 0, false, false); > + osd_req_op_cls_request_data_bvecs(osd_req, 0, orig_request->copyup_bvecs, > + parent_length); > > /* Add the other op(s) */ > > @@ -2469,6 +2443,8 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) > rbd_obj_request_error(orig_request, img_result); > } > > +static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap); > + Move the definition of this new function here, rather than having to insert this declaration. > /* > * Read from the parent image the range of data that covers the > * entire target of the given object request. This is used for > @@ -2487,10 +2463,9 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) > { > struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; > struct rbd_img_request *parent_request = NULL; > + struct ceph_bvec_iter bvec_it = { 0 }; > u64 img_offset; > u64 length; > - struct page **pages = NULL; > - u32 page_count; > int result; > > rbd_assert(rbd_dev->parent != NULL); > @@ -2516,16 +2491,9 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) > * Allocate a page array big enough to receive the data read > * from the parent. > */ > - page_count = (u32)calc_pages_for(0, length); > - pages = ceph_alloc_page_vector(page_count, GFP_NOIO); > - if (IS_ERR(pages)) { > - result = PTR_ERR(pages); > + result = setup_copyup_bvecs(obj_request, length); > + if (result) > goto out_err; > - } > - > - rbd_assert(!obj_request->copyup_pages); > - obj_request->copyup_pages = pages; > - obj_request->copyup_page_count = page_count; > > result = -ENOMEM; > parent_request = rbd_parent_request_create(obj_request, > @@ -2533,7 +2501,10 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) > if (!parent_request) > goto out_err; > > - result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); > + bvec_it.bvecs = obj_request->copyup_bvecs; > + bvec_it.iter.bi_size = length; > + result = rbd_img_request_fill(parent_request, OBJ_REQUEST_BVECS, > + &bvec_it); > if (result) > goto out_err; > > @@ -2751,6 +2722,34 @@ static int rbd_img_request_submit(struct rbd_img_request *img_request) > return ret; > } > > +static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) > +{ > + u32 i; > + > + rbd_assert(!obj_req->copyup_bvecs); > + obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap); > + obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count, > + sizeof(*obj_req->copyup_bvecs), > + GFP_NOIO); > + if (!obj_req->copyup_bvecs) > + return -ENOMEM; > + > + for (i = 0; i < obj_req->copyup_bvec_count; i++) { > + unsigned int len = min(obj_overlap, (u64)PAGE_SIZE); > + > + obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO); > + if (!obj_req->copyup_bvecs[i].bv_page) > + return -ENOMEM; > + > + obj_req->copyup_bvecs[i].bv_offset = 0; > + obj_req->copyup_bvecs[i].bv_len = len; > + obj_overlap -= len; > + } > + > + rbd_assert(!obj_overlap); > + return 0; > +} > + > static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) > { > struct rbd_obj_request *obj_request; > @@ -2832,8 +2831,8 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request) > result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, > &obj_request->bio_pos); > else > - result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES, > - obj_request->pages); > + result = rbd_img_request_fill(img_request, OBJ_REQUEST_BVECS, > + &obj_request->bvec_pos); > if (result) > goto out_err; > > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html