RE: [PATCH for-next 2/3] IB/{hfi1, rdmavt, qib}: Fit completions into single aligned cache-line

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Sebastian,

> -----Original Message-----
> From: linux-rdma-owner@xxxxxxxxxxxxxxx <linux-rdma-owner@xxxxxxxxxxxxxxx>
> On Behalf Of Dennis Dalessandro
> Sent: Monday, September 10, 2018 10:50 AM
> To: jgg@xxxxxxxx; dledford@xxxxxxxxxx
> Cc: linux-rdma@xxxxxxxxxxxxxxx; Mike Marciniszyn
> <mike.marciniszyn@xxxxxxxxx>; Sebastian Sanchez
> <sebastian.sanchez@xxxxxxxxx>; Kamenee Arumugam
> <kamenee.arumugam@xxxxxxxxx>
> Subject: [PATCH for-next 2/3] IB/{hfi1, rdmavt, qib}: Fit completions into single
> aligned cache-line
> 
> From: Sebastian Sanchez <sebastian.sanchez@xxxxxxxxx>
> 
> The struct ib_wc uses two cache-lines per completion, and it is unaligned. This
I agree that it is unaligned.
Bodong and I fixed it in commit [1] to continue to keep ib_wc < 64 bytes and padded for 64B by compiler.
(described more in commit log of [1]).
On which architecture do you see it using two cache-lines?

[1] Commit cd2a6e7d384b ("IB/core: Fix ib_wc structure size to remain in 64 bytes boundary")

> structure used to fit within one cacheline, but it was expanded by fields added in
> the following patches:
> 
> dd5f03beb4f7 ("IB/core: Ethernet L2 attributes in verbs/cm structures")
> c865f24628b9 ("IB/core: Add rdma_network_type to wc")
> 
> These new fields are only needed for ethernet and for HCAs that don't provide
> the network type to search the proper GID in the GID table.
> Since there are two cache-lines, more cache-lines are dirtied per work
> completion entry.
> 
> Create a kernel only rvt_wc structure that is a single aligned cache-line. This
> reduces the cache lines used per completion and eliminates any cache line push-
> pull by aligning the size to a cache-line.
> 
> Cache-aligning the new kernel completion queue expands struct rvt_cq_wc
> breaking the ABI for the user completion queue. Therefore, decouple the kernel
> completion queue from struct rvt_cq_wc to prevent this.
> 
> Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx>
> Reviewed-by: Sebastian Sanchez <sebastian.sanchez@xxxxxxxxx>
> Signed-off-by: Sebastian Sanchez <sebastian.sanchez@xxxxxxxxx>
> Signed-off-by: Kamenee Arumugam <kamenee.arumugam@xxxxxxxxx>
> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx>
> ---
>  drivers/infiniband/hw/hfi1/rc.c         |    2 +
>  drivers/infiniband/hw/hfi1/ruc.c        |    2 +
>  drivers/infiniband/hw/hfi1/uc.c         |    2 +
>  drivers/infiniband/hw/hfi1/ud.c         |    4 +-
>  drivers/infiniband/hw/qib/qib_rc.c      |    2 +
>  drivers/infiniband/hw/qib/qib_ruc.c     |    2 +
>  drivers/infiniband/hw/qib/qib_uc.c      |    2 +
>  drivers/infiniband/hw/qib/qib_ud.c      |    4 +-
>  drivers/infiniband/sw/rdmavt/cq.c       |   57 ++++++++++++++++++++-----------
>  drivers/infiniband/sw/rdmavt/qp.c       |    6 ++-
>  drivers/infiniband/sw/rdmavt/trace_cq.h |    6 ++-
>  include/rdma/rdmavt_cq.h                |   29 +++++++++++++++-
>  include/rdma/rdmavt_qp.h                |    2 +
>  13 files changed, 81 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
> index 9bd63ab..975d91a 100644
> --- a/drivers/infiniband/hw/hfi1/rc.c
> +++ b/drivers/infiniband/hw/hfi1/rc.c
> @@ -2041,7 +2041,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
>  	u32 hdrsize = packet->hlen;
>  	u32 psn = ib_bth_get_psn(packet->ohdr);
>  	u32 pad = packet->pad;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 pmtu = qp->pmtu;
>  	int diff;
>  	struct ib_reth *reth;
> diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
> index 5f56f3c..019713a 100644
> --- a/drivers/infiniband/hw/hfi1/ruc.c
> +++ b/drivers/infiniband/hw/hfi1/ruc.c
> @@ -173,7 +173,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
>  	struct rvt_swqe *wqe;
>  	struct rvt_sge *sge;
>  	unsigned long flags;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u64 sdata;
>  	atomic64_t *maddr;
>  	enum ib_wc_status send_status;
> diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
> index e254dce..5b5e057 100644
> --- a/drivers/infiniband/hw/hfi1/uc.c
> +++ b/drivers/infiniband/hw/hfi1/uc.c
> @@ -312,7 +312,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
>  	u32 hdrsize = packet->hlen;
>  	u32 psn;
>  	u32 pad = packet->pad;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 pmtu = qp->pmtu;
>  	struct ib_reth *reth;
>  	int ret;
> diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
> index 70d39fc..8a4785e 100644
> --- a/drivers/infiniband/hw/hfi1/ud.c
> +++ b/drivers/infiniband/hw/hfi1/ud.c
> @@ -79,7 +79,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct
> rvt_swqe *swqe)
>  	unsigned long flags;
>  	struct rvt_sge_state ssge;
>  	struct rvt_sge *sge;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 length;
>  	enum ib_qp_type sqptype, dqptype;
> 
> @@ -867,7 +867,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16
> pkey, u8 sc5,  void hfi1_ud_rcv(struct hfi1_packet *packet)  {
>  	u32 hdrsize = packet->hlen;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 src_qp;
>  	u16 pkey;
>  	int mgmt_pkey_idx = -1;
> diff --git a/drivers/infiniband/hw/qib/qib_rc.c
> b/drivers/infiniband/hw/qib/qib_rc.c
> index f35fdeb..a121d2c 100644
> --- a/drivers/infiniband/hw/qib/qib_rc.c
> +++ b/drivers/infiniband/hw/qib/qib_rc.c
> @@ -1744,7 +1744,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct
> ib_header *hdr,
>  	u32 hdrsize;
>  	u32 psn;
>  	u32 pad;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 pmtu = qp->pmtu;
>  	int diff;
>  	struct ib_reth *reth;
> diff --git a/drivers/infiniband/hw/qib/qib_ruc.c
> b/drivers/infiniband/hw/qib/qib_ruc.c
> index f8a7de7..1230c37 100644
> --- a/drivers/infiniband/hw/qib/qib_ruc.c
> +++ b/drivers/infiniband/hw/qib/qib_ruc.c
> @@ -191,7 +191,7 @@ static void qib_ruc_loopback(struct rvt_qp *sqp)
>  	struct rvt_swqe *wqe;
>  	struct rvt_sge *sge;
>  	unsigned long flags;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u64 sdata;
>  	atomic64_t *maddr;
>  	enum ib_wc_status send_status;
> diff --git a/drivers/infiniband/hw/qib/qib_uc.c
> b/drivers/infiniband/hw/qib/qib_uc.c
> index 3e54bc1..fa7dd3a 100644
> --- a/drivers/infiniband/hw/qib/qib_uc.c
> +++ b/drivers/infiniband/hw/qib/qib_uc.c
> @@ -242,7 +242,7 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header
> *hdr,
>  	u32 hdrsize;
>  	u32 psn;
>  	u32 pad;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 pmtu = qp->pmtu;
>  	struct ib_reth *reth;
>  	int ret;
> diff --git a/drivers/infiniband/hw/qib/qib_ud.c
> b/drivers/infiniband/hw/qib/qib_ud.c
> index f8d029a..580ebc8 100644
> --- a/drivers/infiniband/hw/qib/qib_ud.c
> +++ b/drivers/infiniband/hw/qib/qib_ud.c
> @@ -58,7 +58,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct
> rvt_swqe *swqe)
>  	unsigned long flags;
>  	struct rvt_sge_state ssge;
>  	struct rvt_sge *sge;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 length;
>  	enum ib_qp_type sqptype, dqptype;
> 
> @@ -434,7 +434,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header
> *hdr,
>  	int opcode;
>  	u32 hdrsize;
>  	u32 pad;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	u32 qkey;
>  	u32 src_qp;
>  	u16 dlid;
> diff --git a/drivers/infiniband/sw/rdmavt/cq.c
> b/drivers/infiniband/sw/rdmavt/cq.c
> index 6406a08..e729335 100644
> --- a/drivers/infiniband/sw/rdmavt/cq.c
> +++ b/drivers/infiniband/sw/rdmavt/cq.c
> @@ -62,10 +62,10 @@
>   *
>   * This may be called with qp->s_lock held.
>   */
> -void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
> +void rvt_cq_enter(struct rvt_cq *cq, struct rvt_wc *entry, bool
> +solicited)
>  {
>  	struct ib_uverbs_wc *uqueue = NULL;
> -	struct ib_wc *kqueue = NULL;
> +	struct rvt_wc *kqueue = NULL;
>  	struct rvt_cq_wc *u_wc = NULL;
>  	struct rvt_k_cq_wc *k_wc = NULL;
>  	unsigned long flags;
> @@ -230,6 +230,8 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
>  	 * numbers of entries.
>  	 */
>  	if (udata && udata->outlen >= sizeof(__u64)) {
> +		int err;
> +
>  		sz = sizeof(struct ib_uverbs_wc) * (entries + 1);
>  		sz += sizeof(*u_wc);
>  		u_wc = vmalloc_user(sz);
> @@ -237,23 +239,11 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
>  			ret = ERR_PTR(-ENOMEM);
>  			goto bail_cq;
>  		}
> -	} else {
> -		sz = sizeof(struct ib_wc) * (entries + 1);
> -		sz += sizeof(*k_wc);
> -		k_wc = vzalloc_node(sz, rdi->dparms.node);
> -		if (!k_wc) {
> -			ret = ERR_PTR(-ENOMEM);
> -			goto bail_cq;
> -		}
> -	}
> -
> -	/*
> -	 * Return the address of the WC as the offset to mmap.
> -	 * See rvt_mmap() for details.
> -	 */
> -	if (udata && udata->outlen >= sizeof(__u64)) {
> -		int err;
> 
> +		/*
> +		 * Return the address of the WC as the offset to mmap.
> +		 * See rvt_mmap() for details.
> +		 */
>  		cq->ip = rvt_create_mmap_info(rdi, sz, context, u_wc);
>  		if (!cq->ip) {
>  			ret = ERR_PTR(-ENOMEM);
> @@ -266,6 +256,14 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
>  			ret = ERR_PTR(err);
>  			goto bail_ip;
>  		}
> +	} else {
> +		sz = sizeof(struct rvt_wc) * (entries + 1);
> +		sz += sizeof(*k_wc);
> +		k_wc = vzalloc_node(sz, rdi->dparms.node);
> +		if (!k_wc) {
> +			ret = ERR_PTR(-ENOMEM);
> +			goto bail_cq;
> +		}
>  	}
> 
>  	spin_lock_irq(&rdi->n_cqs_lock);
> @@ -343,6 +341,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
>  		kref_put(&cq->ip->ref, rvt_release_mmap_info);
>  	else
>  		vfree(cq->queue);
> +	vfree(cq->kqueue);
>  	kfree(cq);
> 
>  	return 0;
> @@ -418,7 +417,7 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct
> ib_udata *udata)
>  		if (!u_wc)
>  			return -ENOMEM;
>  	} else {
> -		sz = sizeof(struct ib_wc) * (cqe + 1);
> +		sz = sizeof(struct rvt_wc) * (cqe + 1);
>  		sz += sizeof(*k_wc);
>  		k_wc = vzalloc_node(sz, rdi->dparms.node);
>  		if (!k_wc)
> @@ -520,6 +519,24 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct
> ib_udata *udata)
>  	return ret;
>  }
> 
> +static void copy_rvt_wc_to_ib_wc(struct ib_wc *ibwc, struct rvt_wc
> +*rvtwc) {
> +	ibwc->wr_id = rvtwc->wr_id;
> +	ibwc->status = rvtwc->status;
> +	ibwc->opcode = rvtwc->opcode;
> +	ibwc->vendor_err = rvtwc->vendor_err;
> +	ibwc->byte_len = rvtwc->byte_len;
> +	ibwc->qp = rvtwc->qp;
> +	ibwc->ex.invalidate_rkey = rvtwc->ex.invalidate_rkey;
> +	ibwc->src_qp = rvtwc->src_qp;
> +	ibwc->wc_flags = rvtwc->wc_flags;
> +	ibwc->slid = rvtwc->slid;
> +	ibwc->pkey_index = rvtwc->pkey_index;
> +	ibwc->sl = rvtwc->sl;
> +	ibwc->dlid_path_bits = rvtwc->dlid_path_bits;
> +	ibwc->port_num = rvtwc->port_num;
> +}
> +
>  /**
>   * rvt_poll_cq - poll for work completion entries
>   * @ibcq: the completion queue to poll
> @@ -554,7 +571,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries,
> struct ib_wc *entry)
>  			break;
>  		/* The kernel doesn't need a RMB since it has the lock. */
>  		trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled);
> -		*entry = wc->kqueue[tail];
> +		copy_rvt_wc_to_ib_wc(entry, &wc->kqueue[tail]);
>  		if (tail >= cq->ibcq.cqe)
>  			tail = 0;
>  		else
> diff --git a/drivers/infiniband/sw/rdmavt/qp.c
> b/drivers/infiniband/sw/rdmavt/qp.c
> index 5ce403c..e2cc827 100644
> --- a/drivers/infiniband/sw/rdmavt/qp.c
> +++ b/drivers/infiniband/sw/rdmavt/qp.c
> @@ -1049,7 +1049,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
>   */
>  int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)  {
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	int ret = 0;
>  	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
> 
> @@ -1573,7 +1573,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct
> ib_recv_wr *wr,
>  			return -ENOMEM;
>  		}
>  		if (unlikely(qp_err_flush)) {
> -			struct ib_wc wc;
> +			struct rvt_wc wc;
> 
>  			memset(&wc, 0, sizeof(wc));
>  			wc.qp = &qp->ibqp;
> @@ -1996,7 +1996,7 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct
> ib_recv_wr *wr,  static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)  {
>  	int i, j, ret;
> -	struct ib_wc wc;
> +	struct rvt_wc wc;
>  	struct rvt_lkey_table *rkt;
>  	struct rvt_pd *pd;
>  	struct rvt_sge_state *ss;
> diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h
> b/drivers/infiniband/sw/rdmavt/trace_cq.h
> index df8e1ad..832308d 100644
> --- a/drivers/infiniband/sw/rdmavt/trace_cq.h
> +++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
> @@ -109,7 +109,7 @@
> 
>  DECLARE_EVENT_CLASS(
>  	rvt_cq_entry_template,
> -	TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
> +	TP_PROTO(struct rvt_cq *cq, struct rvt_wc *wc, u32 idx),
>  	TP_ARGS(cq, wc, idx),
>  	TP_STRUCT__entry(
>  		RDI_DEV_ENTRY(cq->rdi)
> @@ -143,12 +143,12 @@
> 
>  DEFINE_EVENT(
>  	rvt_cq_entry_template, rvt_cq_enter,
> -	TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
> +	TP_PROTO(struct rvt_cq *cq, struct rvt_wc *wc, u32 idx),
>  	TP_ARGS(cq, wc, idx));
> 
>  DEFINE_EVENT(
>  	rvt_cq_entry_template, rvt_cq_poll,
> -	TP_PROTO(struct rvt_cq *cq, struct ib_wc *wc, u32 idx),
> +	TP_PROTO(struct rvt_cq *cq, struct rvt_wc *wc, u32 idx),
>  	TP_ARGS(cq, wc, idx));
> 
>  #endif /* __RVT_TRACE_CQ_H */
> diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h index
> 9f25be0..a5d3e3c 100644
> --- a/include/rdma/rdmavt_cq.h
> +++ b/include/rdma/rdmavt_cq.h
> @@ -62,6 +62,30 @@
>  #include <rdma/rvt-abi.h>
> 
>  /*
> + * If any fields within struct rvt_wc change, the function
> + * copy_rvt_wc_to_ib_wc() should be updated.
> + */
> +struct rvt_wc {
> +	u64			wr_id;
> +	enum ib_wc_status	status;
> +	enum ib_wc_opcode	opcode;
> +	u32			vendor_err;
> +	u32			byte_len;
> +	struct ib_qp	       *qp;
> +	union {
> +		__be32		imm_data;
> +		u32		invalidate_rkey;
> +	} ex;
> +	u32			src_qp;
> +	int			wc_flags;
> +	u32			slid;
> +	u16			pkey_index;
> +	u8			sl;
> +	u8			dlid_path_bits;
> +	u8			port_num; /* valid only for DR SMPs
> onswitches*/
> +} ____cacheline_aligned_in_smp;
> +
> +/*
>   * This structure is used to contain the head pointer, tail pointer,
>   * and completion queue entries as a single memory allocation so
>   * it can be mmap'ed into user space.
> @@ -69,7 +93,8 @@
>  struct rvt_k_cq_wc {
>  	u32 head;               /* index of next entry to fill */
>  	u32 tail;               /* index of next ib_poll_cq() entry */
> -	struct ib_wc kqueue[0];
> +	/* this is actually size ibcq.cqe + 1 */
> +	struct rvt_wc kqueue[0];
>  };
> 
>  /*
> @@ -93,6 +118,6 @@ struct rvt_cq {
>  	return container_of(ibcq, struct rvt_cq, ibcq);  }
> 
> -void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
> +void rvt_cq_enter(struct rvt_cq *cq, struct rvt_wc *entry, bool
> +solicited);
> 
>  #endif          /* DEF_RDMAVT_INCCQH */
> diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index
> 927f6d5..125db26 100644
> --- a/include/rdma/rdmavt_qp.h
> +++ b/include/rdma/rdmavt_qp.h
> @@ -589,7 +589,7 @@ static inline void rvt_qp_swqe_complete(
>  	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
>  	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
>  	     status != IB_WC_SUCCESS) {
> -		struct ib_wc wc;
> +		struct rvt_wc wc;
> 
>  		memset(&wc, 0, sizeof(wc));
>  		wc.wr_id = wqe->wr.wr_id;





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux