This is a note to let you know that I've just added the patch titled IB/hfi1: Update RMT size calculation to the 5.10-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: ib-hfi1-update-rmt-size-calculation.patch and it can be found in the queue-5.10 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit ec19daa6188c9bcb99b0430e4a44608b4beca427 Author: Dean Luick <dean.luick@xxxxxxxxxxxxxxxxxxxx> Date: Mon Jan 9 14:04:29 2023 -0500 IB/hfi1: Update RMT size calculation [ Upstream commit 892ede5a77f337831609fb9c248ac60948061894 ] Fix possible RMT overflow: Use the correct netdev size. Don't allow adjusted user contexts to go negative. Fix QOS calculation: Send kernel context count as an argument since dd->n_krcv_queues is not yet set up in earliest call. Do not include the control context in the QOS calculation. Use the same sized variable to find the max of krcvq[] entries. Update the RMT count explanation to make more sense. Signed-off-by: Dean Luick <dean.luick@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxxxxxxxxxxxxx> Link: https://lore.kernel.org/r/167329106946.1472990.18385495251650939054.stgit@xxxxxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 88476a1a601a4..4b41f35668b20 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1097,7 +1097,7 @@ static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); static void dc_shutdown(struct hfi1_devdata *dd); static void dc_start(struct hfi1_devdata *dd); -static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, +static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp, unsigned int *np); static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms); @@ -13403,7 +13403,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd) int ret; unsigned ngroups; int rmt_count; - int user_rmt_reduced; u32 n_usr_ctxts; u32 send_contexts = chip_send_contexts(dd); u32 rcv_contexts = chip_rcv_contexts(dd); @@ -13462,28 +13461,34 @@ static int set_up_context_variables(struct hfi1_devdata *dd) (num_kernel_contexts + n_usr_ctxts), &node_affinity.real_cpu_mask); /* - * The RMT entries are currently allocated as shown below: - * 1. QOS (0 to 128 entries); - * 2. FECN (num_kernel_context - 1 + num_user_contexts + - * num_netdev_contexts); - * 3. netdev (num_netdev_contexts). - * It should be noted that FECN oversubscribe num_netdev_contexts - * entries of RMT because both netdev and PSM could allocate any receive - * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts, - * and PSM FECN must reserve an RMT entry for each possible PSM receive - * context. + * RMT entries are allocated as follows: + * 1. QOS (0 to 128 entries) + * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts + + * num_netdev_contexts [b]) + * 3. netdev (NUM_NETDEV_MAP_ENTRIES) + * + * Notes: + * [a] Kernel contexts (except control) are included in FECN if kernel + * TID_RDMA is active. + * [b] Netdev and user contexts are randomly allocated from the same + * context pool, so FECN must cover all contexts in the pool. */ - rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2); - if (HFI1_CAP_IS_KSET(TID_RDMA)) - rmt_count += num_kernel_contexts - 1; - if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { - user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count; - dd_dev_err(dd, - "RMT size is reducing the number of user receive contexts from %u to %d\n", - n_usr_ctxts, - user_rmt_reduced); - /* recalculate */ - n_usr_ctxts = user_rmt_reduced; + rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL) + + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1 + : 0) + + n_usr_ctxts + + num_netdev_contexts + + NUM_NETDEV_MAP_ENTRIES; + if (rmt_count > NUM_MAP_ENTRIES) { + int over = rmt_count - NUM_MAP_ENTRIES; + /* try to squish user contexts, minimum of 1 */ + if (over >= n_usr_ctxts) { + dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n"); + return -EINVAL; + } + dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n", + n_usr_ctxts, n_usr_ctxts - over); + n_usr_ctxts -= over; } /* the first N are kernel contexts, the rest are user/netdev contexts */ @@ -14340,15 +14345,15 @@ static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index) } /* return the number of RSM map table entries that will be used for QOS */ -static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, +static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp, unsigned int *np) { int i; unsigned int m, n; - u8 max_by_vl = 0; + uint max_by_vl = 0; /* is QOS active at all? */ - if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS || + if (n_krcv_queues < MIN_KERNEL_KCTXTS || num_vls == 1 || krcvqsset <= 1) goto no_qos; @@ -14406,7 +14411,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt) if (!rmt) goto bail; - rmt_entries = qos_rmt_entries(dd, &m, &n); + rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n); if (rmt_entries == 0) goto bail; qpns_per_vl = 1 << m;