Add a subroutine rxe_can_use_sg() to determine if a packet is a candidate for a fragmented skb. Add a global variable rxe_use_sg to control whether to support nonlinear skbs. Modify rxe_init_packet() to test if the packet should use a fragmented skb. Fixup calls to rxe_init_packet() to use the new API but disable creating nonlinear skbs for now. This is in preparation for using fragmented skbs. Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> --- drivers/infiniband/sw/rxe/rxe.c | 3 ++ drivers/infiniband/sw/rxe/rxe.h | 3 ++ drivers/infiniband/sw/rxe/rxe_loc.h | 2 +- drivers/infiniband/sw/rxe/rxe_mr.c | 12 +++-- drivers/infiniband/sw/rxe/rxe_net.c | 79 +++++++++++++++++++++++++--- drivers/infiniband/sw/rxe/rxe_req.c | 2 +- drivers/infiniband/sw/rxe/rxe_resp.c | 7 ++- 7 files changed, 92 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 51daac5c4feb..388d8103ec20 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -13,6 +13,9 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib"); MODULE_DESCRIPTION("Soft RDMA transport"); MODULE_LICENSE("Dual BSD/GPL"); +/* if true allow using fragmented skbs */ +bool rxe_use_sg; + /* free resources for a rxe device all objects created for this device must * have been destroyed */ diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 30fbdf3bc76a..c78fb497d9c3 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -30,6 +30,9 @@ #include "rxe_verbs.h" #include "rxe_loc.h" +/* if true allow using fragmented skbs */ +extern bool rxe_use_sg; + /* * Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit * machines Version 2 has a different struct layout. diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 4e5fbc33277d..12fd5811cd79 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -101,7 +101,7 @@ void rxe_mw_cleanup(struct rxe_pool_elem *elem); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av, - struct rxe_pkt_info *pkt); + struct rxe_pkt_info *pkt, bool *is_frag); int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, struct sk_buff *skb); int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 6fe5bbe43a60..cf538d97c7a5 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -541,7 +541,7 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma, struct rxe_mr *mr = NULL; int bytes; u64 iova; - int ret; + int nf; int num_frags = 0; if (length == 0) @@ -572,18 +572,22 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma, bytes = min_t(int, length, sge->length - buf_offset); if (bytes > 0) { iova = sge->addr + buf_offset; - ret = rxe_num_mr_frags(mr, iova, length); - if (ret < 0) { + nf = rxe_num_mr_frags(mr, iova, length); + if (nf < 0) { rxe_put(mr); - return ret; + return nf; } + num_frags += nf; buf_offset += bytes; resid -= bytes; length -= bytes; } } + if (mr) + rxe_put(mr); + return num_frags; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index faabc444d546..c6d8f5c80562 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -442,8 +442,60 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, return err; } +/** + * rxe_can_use_sg() - determine if packet is a candidate for fragmenting + * @rxe: the rxe device + * @pkt: packet info + * + * Limit to packets with: + * rxe_use_sg set + * qp is RC + * ndev supports SG + * #sges less than #frags for sends + * + * Returns: true if conditions are met else 0 + */ +static bool rxe_can_use_sg(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int length = pkt->paylen - rxe_opcode[pkt->opcode].length + - RXE_ICRC_SIZE; + int nf; + + if (!rxe_use_sg) + return false; + if (qp_type(pkt->qp) != IB_QPT_RC) + return false; + if (!(rxe->ndev->features & NETIF_F_SG)) + return false; + + /* check we don't have a pathological sge list with lots of + * short segments. Recall we need one extra frag for icrc. + */ + if (pkt->mask & RXE_SEND_MASK) { + nf = rxe_num_dma_frags(qp->pd, &pkt->wqe->dma, length); + return (nf >= 0 && nf <= MAX_SKB_FRAGS - 1) ? true : false; + } + + return true; +} + +#define RXE_MIN_SKB_SIZE (256) + +/** + * rxe_init_packet - allocate and initialize a new skb + * @qp: the queue pair + * @av: remote address vector + * @pkt: packet info + * @frag: optional return value for fragmented skb + * on call if frag == NULL do not use fragmented skb + * on return if not NULL set *frag to 1 + * if packet will be fragmented else 0 + * + * Returns: an skb on success else NULL + */ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av, - struct rxe_pkt_info *pkt) + struct rxe_pkt_info *pkt, bool *frag) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); unsigned int hdr_len; @@ -451,6 +503,7 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av, struct net_device *ndev; const struct ib_gid_attr *attr; const int port_num = 1; + int skb_size; attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index); if (IS_ERR(attr)) @@ -469,9 +522,19 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av, rcu_read_unlock(); goto out; } - skb = alloc_skb(pkt->paylen + hdr_len + LL_RESERVED_SPACE(ndev), - GFP_ATOMIC); + skb_size = LL_RESERVED_SPACE(ndev) + hdr_len + pkt->paylen; + if (frag) { + if (rxe_use_sg && (skb_size > RXE_MIN_SKB_SIZE) && + rxe_can_use_sg(qp, pkt)) { + skb_size = RXE_MIN_SKB_SIZE; + *frag = true; + } else { + *frag = false; + } + } + + skb = alloc_skb(skb_size, GFP_ATOMIC); if (unlikely(!skb)) { rcu_read_unlock(); goto out; @@ -480,7 +543,7 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av, skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev)); /* FIXME: hold reference to this netdev until life of this skb. */ - skb->dev = ndev; + skb->dev = ndev; rcu_read_unlock(); if (av->network_type == RXE_NETWORK_TYPE_IPV4) @@ -488,10 +551,10 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av, else skb->protocol = htons(ETH_P_IPV6); - pkt->rxe = rxe; - pkt->port_num = port_num; - pkt->hdr = skb_put(skb, pkt->paylen); - pkt->mask |= RXE_GRH_MASK; + if (frag && *frag) + pkt->hdr = skb_put(skb, rxe_opcode[pkt->opcode].length); + else + pkt->hdr = skb_put(skb, pkt->paylen); out: rdma_put_gid_attr(attr); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 0a4b8825bd55..71a65f2a5d6d 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -491,7 +491,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp, pad + RXE_ICRC_SIZE; /* init skb */ - skb = rxe_init_packet(qp, av, pkt); + skb = rxe_init_packet(qp, av, pkt, NULL); if (unlikely(!skb)) goto err_out; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 8503d22f9114..8868415b71b6 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -665,6 +665,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, u32 psn, u8 syndrome) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; int paylen; int pad; @@ -672,14 +673,16 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, pad = (-payload) & 0x3; paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - ack->paylen = paylen; + ack->rxe = rxe; ack->qp = qp; ack->opcode = opcode; ack->mask = rxe_opcode[opcode].mask; + ack->paylen = paylen; ack->psn = psn; + ack->port_num = 1; - skb = rxe_init_packet(qp, &qp->pri_av, ack); + skb = rxe_init_packet(qp, &qp->pri_av, ack, NULL); if (!skb) return NULL; -- 2.34.1