From: Vlad Dumitrescu <vdumitrescu@xxxxxxxxxx> This attribute allows system admins to make a trade-off between speed of recovery under transient loss and reducing congestion under persistent loss or overload. Set 15 as max value as it allows sys admins to effectively opt-out the CM from exponential backoff. CM is currently using CMA_MAX_CM_RETRIES (15) constant to set retries. Other MAD layer callers use different values (e.g., sa_query uses 10, UMAD exposes the parameter to userspace), but a max of 15 linear retries should be enough. Example: # rdma management show rocep1s0f1/1 1: rocep1s0f1: 1 mad-linear-timeouts 4 ... # rdma management set rocep1s0f1/1 mad-linear-timeouts 6 # rdma management show 0: rocep1s0f0: 1 mad-linear-timeouts 4 ... 1: rocep1s0f1: 1 mad-linear-timeouts 6 ... Signed-off-by: Vlad Dumitrescu <vdumitrescu@xxxxxxxxxx> Reviewed-by: Sean Hefty <shefty@xxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx> --- drivers/infiniband/core/mad.c | 35 ++++++++++++++++++++++++++++++ drivers/infiniband/core/mad_priv.h | 4 ++++ drivers/infiniband/core/nldev.c | 19 ++++++++++++++++ include/uapi/rdma/rdma_netlink.h | 2 ++ 4 files changed, 60 insertions(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index a3a8cf4bbc20..7c4ac8ae0a3f 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -54,7 +54,9 @@ #define CREATE_TRACE_POINTS #include <trace/events/ib_mad.h> +#define IB_MAD_LINEAR_TIMEOUTS_MIN 1 #define IB_MAD_LINEAR_TIMEOUTS_DEFAULT 4 +#define IB_MAD_LINEAR_TIMEOUTS_MAX 15 #define IB_MAD_MAX_TIMEOUT_MS (60 * MSEC_PER_SEC) #define IB_MAD_MAX_DEADLINE (jiffies + msecs_to_jiffies(5 * 60 * 1000)) @@ -145,6 +147,39 @@ ib_get_mad_port(struct ib_device *device, u32 port_num) return entry; } +int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val, + struct netlink_ext_ack *extack) +{ + struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num); + + if (!port) + return -ENODEV; + + if (val > IB_MAD_LINEAR_TIMEOUTS_MAX || + val < IB_MAD_LINEAR_TIMEOUTS_MIN) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Valid range [%u-%u]", + IB_MAD_LINEAR_TIMEOUTS_MIN, + IB_MAD_LINEAR_TIMEOUTS_MAX); + return -EINVAL; + } + + WRITE_ONCE(port->linear_timeouts, val); + + return 0; +} + +int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val) +{ + struct ib_mad_port_private *port = ib_get_mad_port(dev, port_num); + + if (!port) + return -ENODEV; + + *val = READ_ONCE(port->linear_timeouts); + + return 0; +} + static inline u8 convert_mgmt_class(u8 mgmt_class) { /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 076ebcea27b4..e6b362c054a6 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -241,4 +241,8 @@ void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, unsigned long timeout_ms); +int ib_mad_linear_timeouts_set(struct ib_device *dev, u32 port_num, u8 val, + struct netlink_ext_ack *extack); +int ib_mad_linear_timeouts_get(struct ib_device *dev, u32 port_num, u8 *val); + #endif /* __IB_MAD_PRIV_H__ */ diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 363742567dd2..acb02f8c87c0 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -172,6 +172,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT] = { .type = NLA_U32 }, + [RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -2627,6 +2628,7 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; + u8 mad_linear_timeouts; struct sk_buff *msg; u32 index; u32 port; @@ -2657,6 +2659,10 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } + ret = ib_mad_linear_timeouts_get(device, port, &mad_linear_timeouts); + if (ret) + goto err; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -2680,6 +2686,11 @@ static int nldev_mgmt_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, goto err_msg; } + ret = nla_put_u8(msg, RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS, + mad_linear_timeouts); + if (ret) + goto err_msg; + nlmsg_end(msg, nlh); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); @@ -2695,6 +2706,7 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; + u8 mad_linear_timeouts; u32 index; u32 port; u32 sa_min_timeout; @@ -2723,6 +2735,13 @@ static int nldev_set_mgmt_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, extack); } + if (tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]) { + mad_linear_timeouts = nla_get_u8( + tb[RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS]); + return ib_mad_linear_timeouts_set(device, port, + mad_linear_timeouts, extack); + } + err: ib_device_put(device); return -EINVAL; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 2b1c4c55e51f..d209a5973c8e 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -585,6 +585,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, /* u8 */ RDMA_NLDEV_MGMT_ATTR_SA_MIN_TIMEOUT, /* u32 */ + + RDMA_NLDEV_MGMT_ATTR_MAD_LINEAR_TIMEOUTS, /* u8 */ /* * Always the end */ -- 2.47.0