rxe_mr_copy() currently supports copying between an mr and a contiguous region of kernel memory. Rename rxe_mr_copy() to rxe_copy_mr_data(). Extend the operations to support copying between an mr and an skb fragment list. Fixup calls to rxe_mr_copy() to support the new API. Add two APIs rxe_add_frag() and rxe_num_mr_frags() to add a fragment to and skb and count the total number of fragments needed. Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> --- drivers/infiniband/sw/rxe/rxe_loc.h | 10 +- drivers/infiniband/sw/rxe/rxe_mr.c | 170 +++++++++++++++++++++++--- drivers/infiniband/sw/rxe/rxe_resp.c | 14 ++- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 + 4 files changed, 173 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 532026cdd49e..77661e0ccab7 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -62,11 +62,15 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); -int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length); -int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, - unsigned int length, enum rxe_mr_copy_op op); +int rxe_add_frag(struct sk_buff *skb, struct rxe_mr *mr, struct page *page, + unsigned int length, unsigned int offset); +int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, unsigned int length); +int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova, + void *addr, unsigned int skb_offset, + unsigned int length, enum rxe_mr_copy_op op); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, void *addr, int length, enum rxe_mr_copy_op op); +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length); int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 812c85cad463..2667e8129a1d 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -242,7 +242,79 @@ int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl, return ib_sg_to_pages(ibmr, sgl, sg_nents, sg_offset, rxe_set_page); } -static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr, +/** + * rxe_add_frag() - Add a frag to a nonlinear packet + * @skb: The packet buffer + * @page: The page + * @mr: The memory region + * @length: Length of fragment + * @offset: Offset of fragment in page + * + * Caller must verify that the fragment is contained in the page. + * Caller should verify that the number of fragments does not + * exceed MAX_SKB_FRAGS + * + * Returns: 0 on success else a negative errno + */ +int rxe_add_frag(struct sk_buff *skb, struct rxe_mr *mr, struct page *page, + unsigned int length, unsigned int offset) +{ + int nr_frags = skb_shinfo(skb)->nr_frags; + skb_frag_t *frag = &skb_shinfo(skb)->frags[nr_frags]; + + if (nr_frags >= MAX_SKB_FRAGS) { + rxe_dbg_mr(mr, "ran out of frags"); + return -EINVAL; + } + + frag->bv_len = length; + frag->bv_offset = offset; + frag->bv_page = page; + /* because kfree_skb will call put_page() */ + get_page(page); + skb_shinfo(skb)->nr_frags++; + + skb->data_len += length; + skb->len += length; + + return 0; +} + +/** + * rxe_num_mr_frags() - Compute the number of skb frags needed to copy + * length bytes from an mr to an skb frag list. + * @mr: mr to copy data from + * @iova: iova in memory region as starting point + * @length: number of bytes to transfer + * + * Returns: the number of frags needed + */ +int rxe_num_mr_frags(struct rxe_mr *mr, u64 iova, unsigned int length) +{ + unsigned int page_size; + unsigned int page_offset; + unsigned int bytes; + int num_frags = 0; + + if (mr->ibmr.type == IB_MR_TYPE_DMA) + page_size = PAGE_SIZE; + else + page_size = mr_page_size(mr); + page_offset = iova & (page_size - 1); + + while (length) { + bytes = min_t(unsigned int, length, + page_size - page_offset); + length -= bytes; + page_offset = 0; + num_frags++; + } + + return num_frags; +} + +static int rxe_mr_copy_xarray(struct sk_buff *skb, struct rxe_mr *mr, + u64 iova, void *addr, unsigned int skb_offset, unsigned int length, enum rxe_mr_copy_op op) { unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova); @@ -250,6 +322,7 @@ static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr, unsigned int bytes; struct page *page; void *va; + int err = 0; while (length) { page = xa_load(&mr->page_list, index); @@ -258,12 +331,32 @@ static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr, bytes = min_t(unsigned int, length, mr_page_size(mr) - page_offset); - va = kmap_local_page(page); - if (op == RXE_COPY_FROM_MR) + switch (op) { + case RXE_COPY_FROM_MR: + va = kmap_local_page(page); memcpy(addr, va + page_offset, bytes); - else + kunmap_local(va); + break; + case RXE_COPY_TO_MR: + va = kmap_local_page(page); memcpy(va + page_offset, addr, bytes); - kunmap_local(va); + kunmap_local(va); + break; + case RXE_FRAG_TO_MR: + va = kmap_local_page(page); + err = skb_copy_bits(skb, skb_offset, + va + page_offset, bytes); + kunmap_local(va); + skb_offset += bytes; + break; + case RXE_FRAG_FROM_MR: + err = rxe_add_frag(skb, mr, page, bytes, + page_offset); + break; + } + + if (err) + return err; page_offset = 0; addr += bytes; @@ -274,13 +367,15 @@ static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr, return 0; } -static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr, - unsigned int length, enum rxe_mr_copy_op op) +static int rxe_mr_copy_dma(struct sk_buff *skb, struct rxe_mr *mr, + u64 dma_addr, void *addr, unsigned int skb_offset, + unsigned int length, enum rxe_mr_copy_op op) { unsigned int page_offset = dma_addr & (PAGE_SIZE - 1); unsigned int bytes; struct page *page; u8 *va; + int err = 0; while (length) { page = ib_virt_dma_to_page(dma_addr); @@ -288,10 +383,32 @@ static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr, PAGE_SIZE - page_offset); va = kmap_local_page(page); - if (op == RXE_COPY_FROM_MR) + switch (op) { + case RXE_COPY_FROM_MR: + va = kmap_local_page(page); memcpy(addr, va + page_offset, bytes); - else + kunmap_local(va); + break; + case RXE_COPY_TO_MR: + va = kmap_local_page(page); memcpy(va + page_offset, addr, bytes); + kunmap_local(va); + break; + case RXE_FRAG_TO_MR: + va = kmap_local_page(page); + err = skb_copy_bits(skb, skb_offset, + va + page_offset, bytes); + kunmap_local(va); + skb_offset += bytes; + break; + case RXE_FRAG_FROM_MR: + err = rxe_add_frag(skb, mr, page, bytes, + page_offset); + break; + } + + if (err) + return err; kunmap_local(va); page_offset = 0; @@ -299,10 +416,31 @@ static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr, addr += bytes; length -= bytes; } + + return 0; } -int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, - unsigned int length, enum rxe_mr_copy_op op) +/** + * rxe_copy_mr_data() - transfer data between an MR and a packet + * @skb: the packet buffer + * @mr: the MR + * @iova: the address in the MR + * @addr: the address in the packet (TO/FROM MR only) + * @length: the length to transfer + * @op: copy operation (TO MR, FROM MR or FRAG MR) + * + * Copy data from a range (addr, addr+length-1) in a packet + * to or from a range in an MR object at (iova, iova+length-1). + * Or, build a frag list referencing the MR range. + * + * Caller must verify that the access permissions support the + * operation. + * + * Returns: 0 on success or an error + */ +int rxe_copy_mr_data(struct sk_buff *skb, struct rxe_mr *mr, u64 iova, + void *addr, unsigned int skb_offset, + unsigned int length, enum rxe_mr_copy_op op) { int err; @@ -313,8 +451,8 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return -EINVAL; if (mr->ibmr.type == IB_MR_TYPE_DMA) { - rxe_mr_copy_dma(mr, iova, addr, length, op); - return 0; + return rxe_mr_copy_dma(skb, mr, iova, addr, skb_offset, + length, op); } err = mr_check_range(mr, iova, length); @@ -323,7 +461,8 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - return rxe_mr_copy_xarray(mr, iova, addr, length, op); + return rxe_mr_copy_xarray(skb, mr, iova, addr, skb_offset, + length, op); } /* copy data in or out of a wqe, i.e. sg list @@ -395,7 +534,8 @@ int copy_data( if (bytes > 0) { iova = sge->addr + offset; - err = rxe_mr_copy(mr, iova, addr, bytes, op); + err = rxe_copy_mr_data(NULL, mr, iova, addr, + 0, bytes, op); if (err) goto err2; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 596615c515ad..87d61a462ff5 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -576,12 +576,15 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, static enum resp_states write_data_in(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { + struct sk_buff *skb = PKT_TO_SKB(pkt); enum resp_states rc = RESPST_NONE; - int err; int data_len = payload_size(pkt); + int err; + int skb_offset = 0; - err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, - payload_addr(pkt), data_len, RXE_COPY_TO_MR); + err = rxe_copy_mr_data(skb, qp->resp.mr, qp->resp.va + qp->resp.offset, + payload_addr(pkt), skb_offset, data_len, + RXE_COPY_TO_MR); if (err) { rc = RESPST_ERR_RKEY_VIOLATION; goto out; @@ -876,6 +879,7 @@ static enum resp_states read_reply(struct rxe_qp *qp, int err; struct resp_res *res = qp->resp.res; struct rxe_mr *mr; + unsigned int skb_offset = 0; u8 *pad_addr; if (!res) { @@ -927,8 +931,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, goto err_out; } - err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_COPY_FROM_MR); + err = rxe_copy_mr_data(skb, mr, res->read.va, payload_addr(&ack_pkt), + skb_offset, payload, RXE_COPY_FROM_MR); if (err) { kfree_skb(skb); state = RESPST_ERR_RKEY_VIOLATION; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index d9c44bd30da4..89cf50b938c2 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -278,6 +278,8 @@ enum rxe_mr_state { enum rxe_mr_copy_op { RXE_COPY_TO_MR, RXE_COPY_FROM_MR, + RXE_FRAG_TO_MR, + RXE_FRAG_FROM_MR, }; enum rxe_mr_lookup_type { -- 2.39.2