> Subject: Re: [PATCH for-next 1/3] RDMA/irdma: Fix inline for multiple SGE's > > On 11/4/2022 7:47 PM, Shiraz Saleem wrote: > > From: Mustafa Ismail <mustafa.ismail@xxxxxxxxx> > > > > Currently, inline send and inline write assume a single SGE and only > > copy data from the first one. Add support for multiple SGE's. > > > > Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb > > APIs") > > Signed-off-by: Mustafa Ismail <mustafa.ismail@xxxxxxxxx> > > Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx> > > --- > > drivers/infiniband/hw/irdma/uk.c | 148 +++++++++++++++++++++++------------- > > drivers/infiniband/hw/irdma/user.h | 18 +---- > > drivers/infiniband/hw/irdma/verbs.c | 54 +++++-------- > > 3 files changed, 115 insertions(+), 105 deletions(-) > > > > diff --git a/drivers/infiniband/hw/irdma/uk.c > > b/drivers/infiniband/hw/irdma/uk.c > > index a6e5d35..4424224 100644 > > --- a/drivers/infiniband/hw/irdma/uk.c > > +++ b/drivers/infiniband/hw/irdma/uk.c > > @@ -566,21 +566,36 @@ static void irdma_set_mw_bind_wqe_gen_1(__le64 > > *wqe, > > > > /** > > * irdma_copy_inline_data_gen_1 - Copy inline data to wqe > > - * @dest: pointer to wqe > > - * @src: pointer to inline data > > - * @len: length of inline data to copy > > + * @wqe: pointer to wqe > > + * @sge_list: table of pointers to inline data > > + * @num_sges: Total inline data length > > * @polarity: compatibility parameter > > */ > > -static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len, > > - u8 polarity) > > +static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list, > > + u32 num_sges, u8 polarity) > > { > > - if (len <= 16) { > > - memcpy(dest, src, len); > > - } else { > > - memcpy(dest, src, 16); > > - src += 16; > > - dest = dest + 32; > > - memcpy(dest, src, len - 16); > > + u32 quanta_bytes_remaining = 16; > > + int i; > > + > > + for (i = 0; i < num_sges; i++) { > > We've been bitten by low provider sge limits in fs/cifs/smbdirect.c, so we now > request only a handful. Looks like this will improve that for irdma. > > But I am a little surprised to see no bounds checking on num_sges or total bytes > here, or in irdma_post_send(). So it seems that a malicious or buggy consumer > can cause significant damage. Is that protected elsewhere? Hi Tom - We have protection for these in irdma_uk_inline_send and irdma_uk_inline_rdma_write. Shiraz > > > + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; > > + > > + while (sge_list[i].length) { > > + u32 bytes_copied; > > + > > + bytes_copied = min(sge_list[i].length, > quanta_bytes_remaining); > > + memcpy(wqe, cur_sge, bytes_copied); > > + wqe += bytes_copied; > > + cur_sge += bytes_copied; > > + quanta_bytes_remaining -= bytes_copied; > > + sge_list[i].length -= bytes_copied; > > + > > + if (!quanta_bytes_remaining) { > > + /* Remaining inline bytes reside after the hdr */ > > + wqe += 16; > > + quanta_bytes_remaining = 32; > > + } > > + } > > } > > } > > > > @@ -612,35 +627,50 @@ static void irdma_set_mw_bind_wqe(__le64 *wqe, > > > > /** > > * irdma_copy_inline_data - Copy inline data to wqe > > - * @dest: pointer to wqe > > - * @src: pointer to inline data > > - * @len: length of inline data to copy > > + * @wqe: pointer to wqe > > + * @sge_list: table of pointers to inline data > > + * @num_sges: number of SGE's > > * @polarity: polarity of wqe valid bit > > */ > > -static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 > > polarity) > > +static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list, u32 > num_sges, > > + u8 polarity) > > { > > u8 inline_valid = polarity << IRDMA_INLINE_VALID_S; > > - u32 copy_size; > > - > > - dest += 8; > > - if (len <= 8) { > > - memcpy(dest, src, len); > > - return; > > - } > > - > > - *((u64 *)dest) = *((u64 *)src); > > - len -= 8; > > - src += 8; > > - dest += 24; /* point to additional 32 byte quanta */ > > - > > - while (len) { > > - copy_size = len < 31 ? len : 31; > > - memcpy(dest, src, copy_size); > > - *(dest + 31) = inline_valid; > > - len -= copy_size; > > - dest += 32; > > - src += copy_size; > > + u32 quanta_bytes_remaining = 8; > > + bool first_quanta = true; > > + int i; > > + > > + wqe += 8; > > + > > + for (i = 0; i < num_sges; i++) { > > + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; > > + > > + while (sge_list[i].length) { > > + u32 bytes_copied; > > + > > + bytes_copied = min(sge_list[i].length, > quanta_bytes_remaining); > > + memcpy(wqe, cur_sge, bytes_copied); > > + wqe += bytes_copied; > > + cur_sge += bytes_copied; > > + quanta_bytes_remaining -= bytes_copied; > > + sge_list[i].length -= bytes_copied; > > + > > + if (!quanta_bytes_remaining) { > > + quanta_bytes_remaining = 31; > > + > > + /* Remaining inline bytes reside after the hdr */ > > + if (first_quanta) { > > + first_quanta = false; > > + wqe += 16; > > + } else { > > + *wqe = inline_valid; > > + wqe++; > > + } > > + } > > + } > > } > > + if (!first_quanta && quanta_bytes_remaining < 31) > > + *(wqe + quanta_bytes_remaining) = inline_valid; > > } > > > > /** > > @@ -679,20 +709,27 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk > *qp, > > struct irdma_post_sq_info *info, bool post_sq) > > { > > __le64 *wqe; > > - struct irdma_inline_rdma_write *op_info; > > + struct irdma_rdma_write *op_info; > > u64 hdr = 0; > > u32 wqe_idx; > > bool read_fence = false; > > + u32 i, total_size = 0; > > u16 quanta; > > > > info->push_wqe = qp->push_db ? true : false; > > - op_info = &info->op.inline_rdma_write; > > + op_info = &info->op.rdma_write; > > > > - if (op_info->len > qp->max_inline_data) > > + if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) > > return -EINVAL; > > > > - quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); > > - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, > > + for (i = 0; i < op_info->num_lo_sges; i++) > > + total_size += op_info->lo_sg_list[i].length; > > + > > + if (unlikely(total_size > qp->max_inline_data)) > > + return -EINVAL; > > + > > + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); > > + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, > > info); > > if (!wqe) > > return -ENOMEM; > > @@ -705,7 +742,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk > > *qp, > > > > hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | > > FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | > > - FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | > > + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | > > FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | > > FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | > > FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? > 1 : > > 0) | @@ -719,8 +756,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk > *qp, > > set_64bit_val(wqe, 0, > > FIELD_PREP(IRDMAQPSQ_IMMDATA, info- > >imm_data)); > > > > - qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, > > - qp->swqe_polarity); > > + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list, > > + op_info->num_lo_sges, qp- > >swqe_polarity); > > dma_wmb(); /* make sure WQE is populated before valid bit is set */ > > > > set_64bit_val(wqe, 24, hdr); > > @@ -745,20 +782,27 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, > > struct irdma_post_sq_info *info, bool post_sq) > > { > > __le64 *wqe; > > - struct irdma_post_inline_send *op_info; > > + struct irdma_post_send *op_info; > > u64 hdr; > > u32 wqe_idx; > > bool read_fence = false; > > + u32 i, total_size = 0; > > u16 quanta; > > > > info->push_wqe = qp->push_db ? true : false; > > - op_info = &info->op.inline_send; > > + op_info = &info->op.send; > > > > - if (op_info->len > qp->max_inline_data) > > + if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) > > return -EINVAL; > > > > - quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); > > - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, > > + for (i = 0; i < op_info->num_sges; i++) > > + total_size += op_info->sg_list[i].length; > > + > > + if (unlikely(total_size > qp->max_inline_data)) > > + return -EINVAL; > > + > > + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); > > + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, > > info); > > if (!wqe) > > return -ENOMEM; > > @@ -773,7 +817,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, > > hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | > > FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | > > FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | > > - FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | > > + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | > > FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, > > (info->imm_data_valid ? 1 : 0)) | > > FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | > > @@ -789,8 +833,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, > > if (info->imm_data_valid) > > set_64bit_val(wqe, 0, > > FIELD_PREP(IRDMAQPSQ_IMMDATA, info- > >imm_data)); > > - qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, > > - qp->swqe_polarity); > > + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list, > > + op_info->num_sges, qp->swqe_polarity); > > > > dma_wmb(); /* make sure WQE is populated before valid bit is set */ > > > > diff --git a/drivers/infiniband/hw/irdma/user.h > > b/drivers/infiniband/hw/irdma/user.h > > index 2ef6192..f5d3a7c 100644 > > --- a/drivers/infiniband/hw/irdma/user.h > > +++ b/drivers/infiniband/hw/irdma/user.h > > @@ -173,14 +173,6 @@ struct irdma_post_send { > > u32 ah_id; > > }; > > > > -struct irdma_post_inline_send { > > - void *data; > > - u32 len; > > - u32 qkey; > > - u32 dest_qp; > > - u32 ah_id; > > -}; > > - > > struct irdma_post_rq_info { > > u64 wr_id; > > struct ib_sge *sg_list; > > @@ -193,12 +185,6 @@ struct irdma_rdma_write { > > struct ib_sge rem_addr; > > }; > > > > -struct irdma_inline_rdma_write { > > - void *data; > > - u32 len; > > - struct ib_sge rem_addr; > > -}; > > - > > struct irdma_rdma_read { > > struct ib_sge *lo_sg_list; > > u32 num_lo_sges; > > @@ -241,8 +227,6 @@ struct irdma_post_sq_info { > > struct irdma_rdma_read rdma_read; > > struct irdma_bind_window bind_window; > > struct irdma_inv_local_stag inv_local_stag; > > - struct irdma_inline_rdma_write inline_rdma_write; > > - struct irdma_post_inline_send inline_send; > > } op; > > }; > > > > @@ -291,7 +275,7 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk > *qp, > > bool post_sq); > > > > struct irdma_wqe_uk_ops { > > - void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); > > + void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list, u32 > > +num_sges, u8 polarity); > > u16 (*iw_inline_data_size_to_quanta)(u32 data_size); > > void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge, > > u8 valid); > > diff --git a/drivers/infiniband/hw/irdma/verbs.c > > b/drivers/infiniband/hw/irdma/verbs.c > > index a22afbb..b2006a0 100644 > > --- a/drivers/infiniband/hw/irdma/verbs.c > > +++ b/drivers/infiniband/hw/irdma/verbs.c > > @@ -3165,30 +3165,20 @@ static int irdma_post_send(struct ib_qp *ibqp, > > info.stag_to_inv = ib_wr->ex.invalidate_rkey; > > } > > > > - if (ib_wr->send_flags & IB_SEND_INLINE) { > > - info.op.inline_send.data = (void *)(unsigned long) > > - ib_wr->sg_list[0].addr; > > - info.op.inline_send.len = ib_wr->sg_list[0].length; > > - if (iwqp->ibqp.qp_type == IB_QPT_UD || > > - iwqp->ibqp.qp_type == IB_QPT_GSI) { > > - ah = to_iwah(ud_wr(ib_wr)->ah); > > - info.op.inline_send.ah_id = ah- > >sc_ah.ah_info.ah_idx; > > - info.op.inline_send.qkey = ud_wr(ib_wr)- > >remote_qkey; > > - info.op.inline_send.dest_qp = > ud_wr(ib_wr)->remote_qpn; > > - } > > + info.op.send.num_sges = ib_wr->num_sge; > > + info.op.send.sg_list = ib_wr->sg_list; > > + if (iwqp->ibqp.qp_type == IB_QPT_UD || > > + iwqp->ibqp.qp_type == IB_QPT_GSI) { > > + ah = to_iwah(ud_wr(ib_wr)->ah); > > + info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; > > + info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; > > + info.op.send.dest_qp = ud_wr(ib_wr)- > >remote_qpn; > > + } > > + > > + if (ib_wr->send_flags & IB_SEND_INLINE) > > err = irdma_uk_inline_send(ukqp, &info, false); > > - } else { > > - info.op.send.num_sges = ib_wr->num_sge; > > - info.op.send.sg_list = ib_wr->sg_list; > > - if (iwqp->ibqp.qp_type == IB_QPT_UD || > > - iwqp->ibqp.qp_type == IB_QPT_GSI) { > > - ah = to_iwah(ud_wr(ib_wr)->ah); > > - info.op.send.ah_id = ah- > >sc_ah.ah_info.ah_idx; > > - info.op.send.qkey = ud_wr(ib_wr)- > >remote_qkey; > > - info.op.send.dest_qp = ud_wr(ib_wr)- > >remote_qpn; > > - } > > + else > > err = irdma_uk_send(ukqp, &info, false); > > - } > > break; > > case IB_WR_RDMA_WRITE_WITH_IMM: > > if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) { @@ - > 3205,22 +3195,14 > > @@ static int irdma_post_send(struct ib_qp *ibqp, > > else > > info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; > > > > - if (ib_wr->send_flags & IB_SEND_INLINE) { > > - info.op.inline_rdma_write.data = (void > *)(uintptr_t)ib_wr->sg_list[0].addr; > > - info.op.inline_rdma_write.len = > > - ib_wr->sg_list[0].length; > > - info.op.inline_rdma_write.rem_addr.addr = > > - rdma_wr(ib_wr)->remote_addr; > > - info.op.inline_rdma_write.rem_addr.lkey = > > - rdma_wr(ib_wr)->rkey; > > + info.op.rdma_write.num_lo_sges = ib_wr->num_sge; > > + info.op.rdma_write.lo_sg_list = ib_wr->sg_list; > > + info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)- > >remote_addr; > > + info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; > > + if (ib_wr->send_flags & IB_SEND_INLINE) > > err = irdma_uk_inline_rdma_write(ukqp, &info, > false); > > - } else { > > - info.op.rdma_write.lo_sg_list = (void *)ib_wr- > >sg_list; > > - info.op.rdma_write.num_lo_sges = ib_wr- > >num_sge; > > - info.op.rdma_write.rem_addr.addr = > rdma_wr(ib_wr)->remote_addr; > > - info.op.rdma_write.rem_addr.lkey = > rdma_wr(ib_wr)->rkey; > > + else > > err = irdma_uk_rdma_write(ukqp, &info, false); > > - } > > break; > > case IB_WR_RDMA_READ_WITH_INV: > > inv_stag = true;