[PATCH rdma-next 8/9] RDMA/nldev: Add mad-linear-timeouts management attribute

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Vlad Dumitrescu <vdumitrescu@xxxxxxxxxx>

This attribute allows system admins to make a trade-off between speed
of recovery under transient loss and reducing congestion under
persistent loss or overload.

Set 15 as max value as it allows sys admins to effectively opt-out the
CM from exponential backoff.  CM is currently using CMA_MAX_CM_RETRIES
(15) constant to set retries.  Other MAD layer callers use different
values (e.g., sa_query uses 10, UMAD exposes the parameter to
userspace), but a max of 15 linear retries should be enough.

Example:
  # rdma management show rocep1s0f1/1
  1: rocep1s0f1: 1 mad-linear-timeouts 4 ...
  # rdma management set rocep1s0f1/1 mad-linear-timeouts 6
  # rdma management show
  0: rocep1s0f0: 1 mad-linear-timeouts 4 ...
  1: rocep1s0f1: 1 mad-linear-timeouts 6 ...

Signed-off-by: Vlad Dumitrescu <vdumitrescu@xxxxxxxxxx>
Reviewed-by: Sean Hefty <shefty@xxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx>
---
 drivers/infiniband/core/mad.c      | 35 ++++++++++++++++++++++++++++++
 drivers/infiniband/core/mad_priv.h |  4 ++++
 drivers/infiniband/core/nldev.c    | 19 ++++++++++++++++
 include/uapi/rdma/rdma_netlink.h   |  2 ++
 4 files changed, 60 insertions(+)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a3a8cf4bbc20..7c4ac8ae0a3f 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -54,7 +54,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/ib_mad.h>
 
+#define IB_MAD_LINEAR_TIMEOUTS_MIN	1
 #define IB_MAD_LINEAR_TIMEOUTS_DEFAULT	4
+#define IB_MAD_LINEAR_TIMEOUTS_MAX	15
 #define IB_MAD_MAX_TIMEOUT_MS		(60 * MSEC_PER_SEC)
 #define IB_MAD_MAX_DEADLINE		(jiffies + msecs_to_jiffies(5 * 60 * 1000))
 
@@ -145,6 +147,39 @@ ib_get_mad_port(struct ib_device *device, u32 port_num)
 	return entry;
 }
 
+int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val,
+			       struct netlink_ext_ack *extack)
+{
+	struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num);
+
+	if (!port)
+		return -ENODEV;
+
+	if (val > IB_MAD_LINEAR_TIMEOUTS_MAX ||
+	    val < IB_MAD_LINEAR_TIMEOUTS_MIN) {
+		NL_SET_ERR_MSG_FMT_MOD(extack, "Valid range [%u-%u]",
+				       IB_MAD_LINEAR_TIMEOUTS_MIN,
+				       IB_MAD_LINEAR_TIMEOUTS_MAX);
+		return -EINVAL;
+	}
+
+	WRITE_ONCE(port->linear_timeouts, val);
+
+	return 0;
+}
+
+int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val)
+{
+	struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num);
+
+	if (!port)
+		return -ENODEV;
+
+	*val = READ_ONCE(port->linear_timeouts);
+
+	return 0;
+}
+
 static inline u8 convert_mgmt_class(u8 mgmt_class)
 {
 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 076ebcea27b4..e6b362c054a6 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -241,4 +241,8 @@ void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
 			  unsigned long timeout_ms);
 
+int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val,
+			       struct netlink_ext_ack *extack);
+int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val);
+
 #endif	/* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 363742567dd2..acb02f8c87c0 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -172,6 +172,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE]	= { .type = NLA_U8 },
 	[RDMA_NLDEV_ATTR_EVENT_TYPE]		= { .type = NLA_U8 },
 	[RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT]	= { .type = NLA_U32 },
+	[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS] = { .type = NLA_U8 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2627,6 +2628,7 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 	struct ib_device *device;
+	u8 mad_linear_timeouts;
 	struct sk_buff *msg;
 	u32 index;
 	u32 port;
@@ -2657,6 +2659,10 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			goto err;
 	}
 
+	ret = ib_mad_linear_timeouts_get(device, port, &mad_linear_timeouts);
+	if (ret)
+		goto err;
+
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
@@ -2680,6 +2686,11 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			goto err_msg;
 	}
 
+	ret = nla_put_u8(msg, RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS,
+			 mad_linear_timeouts);
+	if (ret)
+		goto err_msg;
+
 	nlmsg_end(msg, nlh);
 	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
@@ -2695,6 +2706,7 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
 	struct ib_device *device;
+	u8 mad_linear_timeouts;
 	u32 index;
 	u32 port;
 	u32 sa_min_timeout;
@@ -2723,6 +2735,13 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 					     extack);
 	}
 
+	if (tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]) {
+		mad_linear_timeouts = nla_get_u8(
+			tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]);
+		return ib_mad_linear_timeouts_set(device, port,
+						  mad_linear_timeouts, extack);
+	}
+
 err:
 	ib_device_put(device);
 	return -EINVAL;
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 2b1c4c55e51f..d209a5973c8e 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -585,6 +585,8 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_SYS_ATTR_MONITOR_MODE,	/* u8 */
 
 	RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT,	/* u32 */
+
+	RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS, /* u8 */
 	/*
 	 * Always the end
 	 */
-- 
2.47.0





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux