[PATCH for-next] IB/ipoib: Increase IPOIB Datagram mode MTU's upper limit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Sadanand Warrier <sadanand.warrier@xxxxxxxxx>

Currently the IPOIB ud mtu is restricted to 4Kbytes. Remove this
limitation so that the IPOIB module can potentially use an MTU that is
bounded by the MTU of the underlying device.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxx>
Reviewed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx>
Signed-off-by: Sadanand Warrier <sadanand.warrier@xxxxxxxxx>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxx>
---
 drivers/infiniband/hw/hfi1/hfi.h               |    6 +++
 drivers/infiniband/hw/hfi1/qp.c                |   21 ++---------
 drivers/infiniband/hw/hfi1/verbs.c             |    9 +++--
 drivers/infiniband/hw/qib/qib_qp.c             |    2 +
 drivers/infiniband/hw/qib/qib_verbs.c          |    2 +
 drivers/infiniband/sw/rdmavt/qp.c              |    2 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |    2 +
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |    8 +++-
 include/rdma/ib_verbs.h                        |   48 ++++++++++++++++++++++++
 include/rdma/opa_port_info.h                   |    8 ----
 include/rdma/rdma_vt.h                         |    2 +
 11 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index dd84238..de1b02a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1721,6 +1721,12 @@ static inline int valid_ib_mtu(unsigned int mtu)
 		mtu == 4096;
 }
 
+static inline int valid_opa_mtu(unsigned int mtu)
+{
+	return  valid_ib_mtu(mtu) || mtu == 8192 ||
+		mtu == 10240;
+}
+
 static inline int valid_opa_max_mtu(unsigned int mtu)
 {
 	return mtu >= 2048 &&
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1697d96..977bacd 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -168,15 +168,6 @@ static void flush_iowait(struct rvt_qp *qp)
 	write_sequnlock_irqrestore(lock, flags);
 }
 
-static inline int opa_mtu_enum_to_int(int mtu)
-{
-	switch (mtu) {
-	case OPA_MTU_8192:  return 8192;
-	case OPA_MTU_10240: return 10240;
-	default:            return -1;
-	}
-}
-
 /**
  * This function is what we would push to the core layer if we wanted to be a
  * "first class citizen".  Instead we hide this here and rely on Verbs ULPs
@@ -184,15 +175,9 @@ static inline int opa_mtu_enum_to_int(int mtu)
  */
 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 {
-	int val;
-
-	/* Constraining 10KB packets to 8KB packets */
 	if (mtu == (enum ib_mtu)OPA_MTU_10240)
 		mtu = OPA_MTU_8192;
-	val = opa_mtu_enum_to_int((int)mtu);
-	if (val > 0)
-		return val;
-	return ib_mtu_enum_to_int(mtu);
+	return opa_mtu_enum_to_int((enum opa_mtu)mtu);
 }
 
 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -308,13 +293,13 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
 		break;
 	case IB_QPT_SMI:
 		ah = ibah_to_rvtah(wqe->ud_wr.ah);
-		if (wqe->length > (1 << ah->log_pmtu))
+		if (wqe->length > ah->pmtu)
 			return -EINVAL;
 		break;
 	case IB_QPT_GSI:
 	case IB_QPT_UD:
 		ah = ibah_to_rvtah(wqe->ud_wr.ah);
-		if (wqe->length > (1 << ah->log_pmtu))
+		if (wqe->length > ah->pmtu)
 			return -EINVAL;
 		if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
 			return -EINVAL;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index fc2e44c..ab67f8d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1481,9 +1481,10 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 	 * from the Path Records to us will get the new 8k MTU.  Those that
 	 * attempt to process the MTU enum may fail in various ways.
 	 */
-	props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ?
-				      4096 : hfi1_max_mtu), IB_MTU_4096);
-	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
+	props->max_mtu = mtu_to_enum((!valid_opa_mtu(hfi1_max_mtu) ?
+				      4096 : hfi1_max_mtu),
+				     IB_MTU_4096);
+	props->active_mtu = !valid_opa_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
 
 	/*
@@ -1617,7 +1618,7 @@ static void hfi1_notify_new_ah(struct ib_device *ibdev,
 	dd = dd_from_ppd(ppd);
 	ah->vl = sc_to_vlt(dd, sc5);
 	if (ah->vl < num_vls || ah->vl == 15)
-		ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
+		ah->pmtu = dd->vld[ah->vl].mtu;
 }
 
 /**
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 344e401..59f0093 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -402,7 +402,7 @@ int qib_check_send_wqe(struct rvt_qp *qp,
 	case IB_QPT_GSI:
 	case IB_QPT_UD:
 		ah = ibah_to_rvtah(wqe->ud_wr.ah);
-		if (wqe->length > (1 << ah->log_pmtu))
+		if (wqe->length > ah->pmtu)
 			return -EINVAL;
 		/* progress hint */
 		ret = 1;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 14b4057..c5d0ae4 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1367,7 +1367,7 @@ static void qib_notify_new_ah(struct ib_device *ibdev,
 	ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
 	ppd = ppd_from_ibp(ibp);
 	ah->vl = ibp->sl_to_vl[rdma_ah_get_sl(&ah->attr)];
-	ah->log_pmtu = ilog2(ppd->ibmtu);
+	ah->pmtu = ppd->ibmtu;
 }
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 6e9a351..00ddf97 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1835,7 +1835,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
 	    qp->ibqp.qp_type != IB_QPT_RC) {
 		struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
 
-		log_pmtu = ah->log_pmtu;
+		log_pmtu =  ilog2(ah->pmtu);
 		atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf291f9..49af25f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2257,7 +2257,7 @@ void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 		goto device_init_failed;
 	}
 
-	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+	priv->max_ib_mtu = rdma_mtu_enum_to_int(hca, port, attr.max_mtu);
 
 	/* MTU will be reset when mcast join happens */
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 6709328..025cb07 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -243,10 +243,14 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 		if (priv->mcast_mtu == priv->admin_mtu)
 			priv->admin_mtu =
 			priv->mcast_mtu =
-			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+			IPOIB_UD_MTU(rdma_mtu_enum_to_int(priv->ca,
+							  priv->port,
+							  priv->broadcast->mcmember.mtu));
 		else
 			priv->mcast_mtu =
-			IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+			IPOIB_UD_MTU(rdma_mtu_enum_to_int(priv->ca,
+							  priv->port,
+							  priv->broadcast->mcmember.mtu));
 
 		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
 		spin_unlock_irq(&priv->lock);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e849bd0..b1fd9b1 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -395,6 +395,11 @@ enum ib_mtu {
 	IB_MTU_4096 = 5
 };
 
+enum opa_mtu {
+	OPA_MTU_8192 = 6,
+	OPA_MTU_10240 = 7
+};
+
 static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
 {
 	switch (mtu) {
@@ -421,6 +426,24 @@ static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
 		return IB_MTU_256;
 }
 
+static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
+{
+	switch (mtu) {
+	case OPA_MTU_8192: return 8192;
+	case OPA_MTU_10240: return 10240;
+	default: return(ib_mtu_enum_to_int((enum ib_mtu) mtu));
+	}
+}
+
+static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
+{
+	if (mtu >= 10240)
+		return OPA_MTU_10240;
+	else if (mtu >= 8192)
+		return OPA_MTU_8192;
+	else return ((enum opa_mtu) ib_mtu_int_to_enum(mtu));
+}
+
 enum ib_port_state {
 	IB_PORT_NOP		= 0,
 	IB_PORT_DOWN		= 1,
@@ -2997,6 +3020,31 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
 	return rdma_protocol_iwarp(dev, port_num);
 }
 
+/**
+ * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
+ *
+ * @device: Device
+ * @port_num: 1 based Port number
+ *
+ * Return true if the port is an Intel OPA port false if not.
+ */
+
+static inline bool rdma_core_cap_opa_port(struct ib_device *device,
+					  u32 port_num)
+{
+	return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_PORT_INTEL_OPA)
+		== RDMA_CORE_PORT_INTEL_OPA;
+}
+
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
+				       int mtu)
+{
+	if (rdma_core_cap_opa_port(device, port))
+		return opa_mtu_enum_to_int((enum opa_mtu) mtu);
+	else
+		return ib_mtu_enum_to_int((enum ib_mtu) mtu);
+}
+
 int ib_query_gid(struct ib_device *device,
 		 u8 port_num, int index, union ib_gid *gid,
 		 struct ib_gid_attr *attr);
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index b4f0ac0..b3518e9 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -137,14 +137,6 @@
 #define OPA_CAP_MASK3_IsVLMarkerSupported         (1 << 1)
 #define OPA_CAP_MASK3_IsVLrSupported              (1 << 0)
 
-/**
- * new MTU values
- */
-enum {
-	OPA_MTU_8192  = 6,
-	OPA_MTU_10240 = 7,
-};
-
 enum {
 	OPA_PORT_PHYS_CONF_DISCONNECTED = 0,
 	OPA_PORT_PHYS_CONF_STANDARD     = 1,
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a..a3d98cb 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -189,8 +189,8 @@ struct rvt_ah {
 	struct ib_ah ibah;
 	struct rdma_ah_attr attr;
 	atomic_t refcount;
+	u16 pmtu;
 	u8 vl;
-	u8 log_pmtu;
 };
 
 struct rvt_dev_info;

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux