On Thu, 2025-03-13 at 23:32 +0000, David Howells wrote: > Make rbd_obj_read_sync() allocate and use a ceph_databuf object to convey > the data into the operation. This has some space preallocated and this is > allocated by alloc_pages() and accessed with kmap_local rather than being > kmalloc'd. This allows MSG_SPLICE_PAGES to be used. > > Signed-off-by: David Howells <dhowells@xxxxxxxxxx> > cc: Viacheslav Dubeyko <slava@xxxxxxxxxxx> > cc: Alex Markuze <amarkuze@xxxxxxxxxx> > cc: Ilya Dryomov <idryomov@xxxxxxxxx> > cc: ceph-devel@xxxxxxxxxxxxxxx > cc: linux-fsdevel@xxxxxxxxxxxxxxx > --- > drivers/block/rbd.c | 45 ++++++++++++++++++++------------------------- > 1 file changed, 20 insertions(+), 25 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index faafd7ff43d6..bb953634c7cb 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -4822,13 +4822,10 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) > static int rbd_obj_read_sync(struct rbd_device *rbd_dev, > struct ceph_object_id *oid, > struct ceph_object_locator *oloc, > - void *buf, int buf_len) > - > + struct ceph_databuf *dbuf, int len) > { > struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; > struct ceph_osd_request *req; > - struct page **pages; > - int num_pages = calc_pages_for(0, buf_len); > int ret; > > req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); > @@ -4839,15 +4836,8 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, > ceph_oloc_copy(&req->r_base_oloc, oloc); > req->r_flags = CEPH_OSD_FLAG_READ; > > - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); > - if (IS_ERR(pages)) { > - ret = PTR_ERR(pages); > - goto out_req; > - } > - > - osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0); > - osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false, > - true); > + osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, len, 0, 0); > + osd_req_op_extent_osd_databuf(req, 0, dbuf); > > ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); > if (ret) > @@ -4855,9 +4845,6 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, > > ceph_osdc_start_request(osdc, req); > ret = ceph_osdc_wait_request(osdc, req); > - if (ret >= 0) > - ceph_copy_from_page_vector(pages, buf, 0, ret); > - > out_req: > ceph_osdc_put_request(req); > return ret; > @@ -4872,12 +4859,18 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, > struct rbd_image_header *header, > bool first_time) > { > - struct rbd_image_header_ondisk *ondisk = NULL; > + struct rbd_image_header_ondisk *ondisk; > + struct ceph_databuf *dbuf = NULL; > u32 snap_count = 0; > u64 names_size = 0; > u32 want_count; > int ret; > > + dbuf = ceph_databuf_req_alloc(1, sizeof(*ondisk), GFP_KERNEL); I am slightly worried about such using of ondisk variable. We have garbage as a value of ondisk pointer on this step yet. And pointer dereferencing could look confusing here. Also, potentially, compiler and static analysis tools could complain. I don't see a problem here but anyway I am feeling worried. :) Thanks, Slava. > + if (!dbuf) > + return -ENOMEM; > + ondisk = kmap_ceph_databuf_page(dbuf, 0); > + > /* > * The complete header will include an array of its 64-bit > * snapshot ids, followed by the names of those snapshots as > @@ -4888,17 +4881,18 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, > do { > size_t size; > > - kfree(ondisk); > - > size = sizeof (*ondisk); > size += snap_count * sizeof (struct rbd_image_snap_ondisk); > size += names_size; > - ondisk = kmalloc(size, GFP_KERNEL); > - if (!ondisk) > - return -ENOMEM; > + > + ret = -ENOMEM; > + if (size > dbuf->limit && > + ceph_databuf_reserve(dbuf, size - dbuf->limit, > + GFP_KERNEL) < 0) > + goto out; > > ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid, > - &rbd_dev->header_oloc, ondisk, size); > + &rbd_dev->header_oloc, dbuf, size); > if (ret < 0) > goto out; > if ((size_t)ret < size) { > @@ -4907,6 +4901,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, > size, ret); > goto out; > } > + > if (!rbd_dev_ondisk_valid(ondisk)) { > ret = -ENXIO; > rbd_warn(rbd_dev, "invalid header"); > @@ -4920,8 +4915,8 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, > > ret = rbd_header_from_disk(header, ondisk, first_time); > out: > - kfree(ondisk); > - > + kunmap_local(ondisk); > + ceph_databuf_release(dbuf); > return ret; > } > > >