Convert the reply buffer of ceph_osdc_notify() to ceph_databuf rather than an array of pages. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Viacheslav Dubeyko <slava@xxxxxxxxxxx> cc: Alex Markuze <amarkuze@xxxxxxxxxx> cc: Ilya Dryomov <idryomov@xxxxxxxxx> cc: ceph-devel@xxxxxxxxxxxxxxx cc: linux-fsdevel@xxxxxxxxxxxxxxx --- drivers/block/rbd.c | 36 +++++++++++++++++---------- include/linux/ceph/databuf.h | 16 ++++++++++++ include/linux/ceph/osd_client.h | 7 ++---- net/ceph/osd_client.c | 44 +++++++++++---------------------- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index eea12c7ab2a0..a2674077edea 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3585,8 +3585,7 @@ static void rbd_unlock(struct rbd_device *rbd_dev) static int __rbd_notify_op_lock(struct rbd_device *rbd_dev, enum rbd_notify_op notify_op, - struct page ***preply_pages, - size_t *preply_len) + struct ceph_databuf *reply) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_client_id cid = rbd_get_cid(rbd_dev); @@ -3604,13 +3603,13 @@ static int __rbd_notify_op_lock(struct rbd_device *rbd_dev, return ceph_osdc_notify(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, buf, buf_size, - RBD_NOTIFY_TIMEOUT, preply_pages, preply_len); + RBD_NOTIFY_TIMEOUT, reply); } static void rbd_notify_op_lock(struct rbd_device *rbd_dev, enum rbd_notify_op notify_op) { - __rbd_notify_op_lock(rbd_dev, notify_op, NULL, NULL); + __rbd_notify_op_lock(rbd_dev, notify_op, NULL); } static void rbd_notify_acquired_lock(struct work_struct *work) @@ -3631,23 +3630,29 @@ static void rbd_notify_released_lock(struct work_struct *work) static int rbd_request_lock(struct rbd_device *rbd_dev) { - struct page **reply_pages; - size_t reply_len; + struct ceph_databuf *reply; bool lock_owner_responded = false; int ret; dout("%s rbd_dev %p\n", __func__, rbd_dev); - ret = __rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_REQUEST_LOCK, - &reply_pages, &reply_len); + /* The actual reply pages will be allocated in the read path and then + * pasted in in handle_watch_notify(). + */ + reply = ceph_databuf_reply_alloc(0, 0, GFP_KERNEL); + if (!reply) + return -ENOMEM; + + ret = __rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_REQUEST_LOCK, reply); if (ret && ret != -ETIMEDOUT) { rbd_warn(rbd_dev, "failed to request lock: %d", ret); goto out; } - if (reply_len > 0 && reply_len <= PAGE_SIZE) { - void *p = page_address(reply_pages[0]); - void *const end = p + reply_len; + if (ceph_databuf_len(reply) > 0 && ceph_databuf_len(reply) <= PAGE_SIZE) { + void *s = kmap_ceph_databuf_page(reply, 0); + void *p = s; + void *const end = p + ceph_databuf_len(reply); u32 n; ceph_decode_32_safe(&p, end, n, e_inval); /* num_acks */ @@ -3659,10 +3664,12 @@ static int rbd_request_lock(struct rbd_device *rbd_dev) p += 8 + 8; /* skip gid and cookie */ ceph_decode_32_safe(&p, end, len, e_inval); - if (!len) + if (!len) { continue; + } if (lock_owner_responded) { + kunmap_local(s); rbd_warn(rbd_dev, "duplicate lock owners detected"); ret = -EIO; @@ -3673,6 +3680,7 @@ static int rbd_request_lock(struct rbd_device *rbd_dev) ret = ceph_start_decoding(&p, end, 1, "ResponseMessage", &struct_v, &len); if (ret) { + kunmap_local(s); rbd_warn(rbd_dev, "failed to decode ResponseMessage: %d", ret); @@ -3681,6 +3689,8 @@ static int rbd_request_lock(struct rbd_device *rbd_dev) ret = ceph_decode_32(&p); } + + kunmap_local(s); } if (!lock_owner_responded) { @@ -3689,7 +3699,7 @@ static int rbd_request_lock(struct rbd_device *rbd_dev) } out: - ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len)); + ceph_databuf_release(reply); return ret; e_inval: diff --git a/include/linux/ceph/databuf.h b/include/linux/ceph/databuf.h index 54b76d0c91a0..25154b3d08fa 100644 --- a/include/linux/ceph/databuf.h +++ b/include/linux/ceph/databuf.h @@ -150,4 +150,20 @@ static inline bool ceph_databuf_is_all_zero(struct ceph_databuf *dbuf, size_t co ceph_databuf_scan_for_nonzero) == count; } +static inline void ceph_databuf_transfer(struct ceph_databuf *to, + struct ceph_databuf *from) +{ + BUG_ON(to->nr_bvec || to->bvec); + to->bvec = from->bvec; + to->nr_bvec = from->nr_bvec; + to->max_bvec = from->max_bvec; + to->limit = from->limit; + to->iter = from->iter; + + from->bvec = NULL; + from->nr_bvec = from->max_bvec = 0; + from->limit = 0; + iov_iter_discard(&from->iter, ITER_DEST, 0); +} + #endif /* __FS_CEPH_DATABUF_H */ diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5a1ee66ca216..7eff589711cc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -333,9 +333,7 @@ struct ceph_osd_linger_request { struct ceph_databuf *request_pl; struct ceph_databuf *notify_id_buf; - - struct page ***preply_pages; - size_t *preply_len; + struct ceph_databuf *reply; }; struct ceph_watch_item { @@ -589,8 +587,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, void *payload, u32 payload_len, u32 timeout, - struct page ***preply_pages, - size_t *preply_len); + struct ceph_databuf *reply); int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, struct ceph_object_id *oid, struct ceph_object_locator *oloc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1a0cb2cdcc52..92aaa5ed9145 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -4523,17 +4523,11 @@ static void handle_watch_notify(struct ceph_osd_client *osdc, dout("lreq %p notify_id %llu != %llu, ignoring\n", lreq, lreq->notify_id, notify_id); } else if (!completion_done(&lreq->notify_finish_wait)) { - struct ceph_msg_data *data = - msg->num_data_items ? &msg->data[0] : NULL; - - if (data) { - if (lreq->preply_pages) { - WARN_ON(data->type != - CEPH_MSG_DATA_PAGES); - *lreq->preply_pages = data->pages; - *lreq->preply_len = data->length; - data->own_pages = false; - } + if (msg->num_data_items && lreq->reply) { + struct ceph_msg_data *data = &msg->data[0]; + + WARN_ON(data->type != CEPH_MSG_DATA_DATABUF); + ceph_databuf_transfer(lreq->reply, data->dbuf); } lreq->notify_finish_error = return_code; complete_all(&lreq->notify_finish_wait); @@ -4823,10 +4817,7 @@ EXPORT_SYMBOL(ceph_osdc_notify_ack); /* * @timeout: in seconds * - * @preply_{pages,len} are initialized both on success and error. - * The caller is responsible for: - * - * ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len)) + * @reply should be an empty ceph_databuf. */ int ceph_osdc_notify(struct ceph_osd_client *osdc, struct ceph_object_id *oid, @@ -4834,8 +4825,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, void *payload, u32 payload_len, u32 timeout, - struct page ***preply_pages, - size_t *preply_len) + struct ceph_databuf *reply) { struct ceph_osd_linger_request *lreq; void *p; @@ -4845,10 +4835,6 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, return -EIO; WARN_ON(!timeout); - if (preply_pages) { - *preply_pages = NULL; - *preply_len = 0; - } lreq = linger_alloc(osdc); if (!lreq) @@ -4875,8 +4861,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, goto out_put_lreq; } - lreq->preply_pages = preply_pages; - lreq->preply_len = preply_len; + lreq->reply = reply; ceph_oid_copy(&lreq->t.base_oid, oid); ceph_oloc_copy(&lreq->t.base_oloc, oloc); @@ -5383,7 +5368,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, return m; } -static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) +static struct ceph_msg *alloc_msg_with_data_buffer(struct ceph_msg_header *hdr) { struct ceph_msg *m; int type = le16_to_cpu(hdr->type); @@ -5395,16 +5380,15 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) return NULL; if (data_len) { - struct page **pages; + struct ceph_databuf *dbuf; - pages = ceph_alloc_page_vector(calc_pages_for(0, data_len), - GFP_NOIO); - if (IS_ERR(pages)) { + dbuf = ceph_databuf_reply_alloc(0, data_len, GFP_NOIO); + if (!dbuf) { ceph_msg_put(m); return NULL; } - ceph_msg_data_add_pages(m, pages, data_len, 0, true); + ceph_msg_data_add_databuf(m, dbuf); } return m; @@ -5422,7 +5406,7 @@ static struct ceph_msg *osd_alloc_msg(struct ceph_connection *con, case CEPH_MSG_OSD_MAP: case CEPH_MSG_OSD_BACKOFF: case CEPH_MSG_WATCH_NOTIFY: - return alloc_msg_with_page_vector(hdr); + return alloc_msg_with_data_buffer(hdr); case CEPH_MSG_OSD_OPREPLY: return get_reply(con, hdr, skip); default: