We'll need a generic mechanism for processing only the parts of an egress RPC message that are _not_ a READ payload. This will be used in subsequent patches. This is a separate patch to reduce the complexity of subsequent patches, so that the logic of this new mechanism can be separately reviewed. Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> --- include/linux/sunrpc/svc_rdma.h | 4 ++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 37e4c597dc71..93642a889535 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -195,6 +195,10 @@ extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_skip_payloads(const struct xdr_buf *xdr, + const struct svc_rdma_recv_ctxt *rctxt, + int (*actor)(const struct xdr_buf *, void *), + void *data); extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr); extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *sctxt, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 9fe7b0d1e335..85c91d0debb4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -515,6 +515,78 @@ svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt, return svc_rdma_encode_write_chunk(sctxt, &payload); } +static inline int +xdr_buf_process_region(const struct xdr_buf *xdr, + unsigned int offset, unsigned int length, + int (*actor)(const struct xdr_buf *, void *), + void *data) +{ + struct xdr_buf subbuf; + + if (!length) + return 0; + if (xdr_buf_subsegment(xdr, &subbuf, offset, length)) + return -EMSGSIZE; + return actor(&subbuf, data); +} + +/** + * svc_rdma_skip_payloads - Call an actor for non-payload regions of @xdr + * @xdr: xdr_buf to process + * @rctxt: Write and Reply chunks provided by client + * @actor: function to invoke on that region + * @data: pointer to arguments for @actor + * + * This mechanism must ignore not only READ payloads that were already + * sent via RDMA Write, but also XDR padding for those payloads that + * the upper layer has added. + * + * Assumptions: + * The xdr->len and rp_ fields are aligned to 4-byte multiples. + * + * Returns: + * On success, zero, + * %-EMSGSIZE on XDR buffer overflow, or + * The return value of @actor + */ +int svc_rdma_skip_payloads(const struct xdr_buf *xdr, + const struct svc_rdma_recv_ctxt *rctxt, + int (*actor)(const struct xdr_buf *, void *), + void *data) +{ + const unsigned int num_payloads = rctxt ? rctxt->rc_cur_payload : 0; + unsigned int offset, length; + int i, ret; + + if (likely(!num_payloads)) + return actor(xdr, data); + + /* Before the first READ payload */ + offset = 0; + length = rctxt->rc_read_payloads[0].rp_offset; + ret = xdr_buf_process_region(xdr, offset, length, actor, data); + if (ret < 0) + return ret; + + /* Any middle READ payloads */ + for (i = 0; i + 1 < num_payloads; i++) { + offset = xdr_align_size(length + rctxt->rc_read_payloads[i].rp_length); + length = rctxt->rc_read_payloads[i + 1].rp_offset - offset; + ret = xdr_buf_process_region(xdr, offset, length, actor, data); + if (ret < 0) + return ret; + } + + /* After the last READ payload */ + offset = xdr_align_size(length + rctxt->rc_read_payloads[i].rp_length); + length = xdr->len - offset; + ret = xdr_buf_process_region(xdr, offset, length, actor, data); + if (ret < 0) + return ret; + + return 0; +} + static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt, struct page *page,