From: Vlad Dumitrescu <vdumitrescu@xxxxxxxxxx> SA query users, both in-kernel and userspace (via rdma_resolve_route), pass in a total timeout and expect the SA query layer to handle retries. SA query relies on the MAD layer to issue a fixed number of 10 retries at the specified interval (timeout), set to 1/10 of the requested total timeout. When the caller-requested total timeout is low (e.g., 1s for IPoIB), the resulting retry interval (e.g., 100ms) to too aggressive. There have been reports of overloaded SA receivers. Hence, enforce a minimum. A follow-up change will make this configurable via rdma tool (netlink) at per-port granularity. Continue to enforce the caller's total timeout by using the new MAD layer deadline option. Remove small-timeout special case - the total timeout option will take care of stopping the send even when more retries are left. Moreover, this special case results in an extremely aggressive 1ms retries, which is definitely not desirable. Signed-off-by: Vlad Dumitrescu <vdumitrescu@xxxxxxxxxx> Reviewed-by: Sean Hefty <shefty@xxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx> --- drivers/infiniband/core/sa_query.c | 34 +++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 53571e6b3162..ac0d53bf91c4 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/kref.h> +#include <linux/minmax.h> #include <linux/xarray.h> #include <linux/workqueue.h> #include <uapi/linux/if_ether.h> @@ -59,6 +60,7 @@ #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 #define IB_SA_CPI_MAX_RETRY_CNT 3 #define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */ +#define IB_SA_MIN_TIMEOUT_MS_DEFAULT 500 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; struct ib_sa_sm_ah { @@ -96,6 +98,7 @@ struct ib_sa_port { spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u32 port_num; + u32 min_timeout_ms; }; struct ib_sa_device { @@ -1344,13 +1347,14 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, if (ret < 0) return ret; - query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries; + query->mad_buf->timeout_ms = + max(READ_ONCE(query->port->min_timeout_ms), + timeout_ms / nmbr_sa_query_retries); query->mad_buf->retries = nmbr_sa_query_retries; - if (!query->mad_buf->timeout_ms) { - /* Special case, very small timeout_ms */ - query->mad_buf->timeout_ms = 1; - query->mad_buf->retries = timeout_ms; - } + ret = ib_set_mad_deadline(query->mad_buf, timeout_ms); + if (ret) + goto out; + query->mad_buf->context[0] = query; query->id = id; @@ -1364,18 +1368,22 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, } ret = ib_post_send_mad(query->mad_buf, NULL); - if (ret) { - xa_lock_irqsave(&queries, flags); - __xa_erase(&queries, id); - xa_unlock_irqrestore(&queries, flags); - } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in * another context. */ - return ret ? ret : id; + +out: + if (ret) { + xa_lock_irqsave(&queries, flags); + __xa_erase(&queries, id); + xa_unlock_irqrestore(&queries, flags); + return ret; + } + + return id; } void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec) @@ -2192,6 +2200,8 @@ static int ib_sa_add_one(struct ib_device *device) INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work, update_ib_cpi); + sa_dev->port[i].min_timeout_ms = IB_SA_MIN_TIMEOUT_MS_DEFAULT; + count++; } -- 2.47.0