Patch "bnxt_en: Avoid order-5 memory allocation for TPA data" has been added to the 5.15-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    bnxt_en: Avoid order-5 memory allocation for TPA data

to the 5.15-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch
and it can be found in the queue-5.15 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit f2ec910b1b12258e18eac322e959ada25e7359e0
Author: Michael Chan <michael.chan@xxxxxxxxxxxx>
Date:   Fri Mar 3 18:43:57 2023 -0800

    bnxt_en: Avoid order-5 memory allocation for TPA data
    
    [ Upstream commit accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 ]
    
    The driver needs to keep track of all the possible concurrent TPA (GRO/LRO)
    completions on the aggregation ring.  On P5 chips, the maximum number
    of concurrent TPA is 256 and the amount of memory we allocate is order-5
    on systems using 4K pages.  Memory allocation failure has been reported:
    
    NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1
    CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1
    Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022
    Call Trace:
     dump_stack+0x57/0x6e
     warn_alloc.cold.120+0x7b/0xdd
     ? _cond_resched+0x15/0x30
     ? __alloc_pages_direct_compact+0x15f/0x170
     __alloc_pages_slowpath.constprop.108+0xc58/0xc70
     __alloc_pages_nodemask+0x2d0/0x300
     kmalloc_order+0x24/0xe0
     kmalloc_order_trace+0x19/0x80
     bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en]
     ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en]
     __bnxt_open_nic+0x12e/0x780 [bnxt_en]
     bnxt_open+0x10b/0x240 [bnxt_en]
     __dev_open+0xe9/0x180
     __dev_change_flags+0x1af/0x220
     dev_change_flags+0x21/0x60
     do_setlink+0x35c/0x1100
    
    Instead of allocating this big chunk of memory and dividing it up for the
    concurrent TPA instances, allocate each small chunk separately for each
    TPA instance.  This will reduce it to order-0 allocations.
    
    Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.")
    Reviewed-by: Somnath Kotur <somnath.kotur@xxxxxxxxxxxx>
    Reviewed-by: Damodharam Ammepalli <damodharam.ammepalli@xxxxxxxxxxxx>
    Reviewed-by: Pavan Chebbi <pavan.chebbi@xxxxxxxxxxxx>
    Signed-off-by: Michael Chan <michael.chan@xxxxxxxxxxxx>
    Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index f64df4d532896..4e98e34fc46b5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2999,7 +2999,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
 
 static void bnxt_free_tpa_info(struct bnxt *bp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < bp->rx_nr_rings; i++) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
@@ -3007,8 +3007,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
 		kfree(rxr->rx_tpa_idx_map);
 		rxr->rx_tpa_idx_map = NULL;
 		if (rxr->rx_tpa) {
-			kfree(rxr->rx_tpa[0].agg_arr);
-			rxr->rx_tpa[0].agg_arr = NULL;
+			for (j = 0; j < bp->max_tpa; j++) {
+				kfree(rxr->rx_tpa[j].agg_arr);
+				rxr->rx_tpa[j].agg_arr = NULL;
+			}
 		}
 		kfree(rxr->rx_tpa);
 		rxr->rx_tpa = NULL;
@@ -3017,14 +3019,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
 
 static int bnxt_alloc_tpa_info(struct bnxt *bp)
 {
-	int i, j, total_aggs = 0;
+	int i, j;
 
 	bp->max_tpa = MAX_TPA;
 	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 		if (!bp->max_tpa_v2)
 			return 0;
 		bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
-		total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
 	}
 
 	for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -3038,12 +3039,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
 
 		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
 			continue;
-		agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
-		rxr->rx_tpa[0].agg_arr = agg;
-		if (!agg)
-			return -ENOMEM;
-		for (j = 1; j < bp->max_tpa; j++)
-			rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
+		for (j = 0; j < bp->max_tpa; j++) {
+			agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
+			if (!agg)
+				return -ENOMEM;
+			rxr->rx_tpa[j].agg_arr = agg;
+		}
 		rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
 					      GFP_KERNEL);
 		if (!rxr->rx_tpa_idx_map)



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux