Re: ACPI IRQ storm with 6.10

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 20, 2024 at 11:13:54PM +0200, Petr Valenta wrote:
> Dne 20. 08. 24 v 20:09 Bjorn Helgaas napsal(a):
> > On Mon, Aug 19, 2024 at 07:23:42AM +0200, Jiri Slaby wrote:
> > > On 19. 08. 24, 6:50, Jiri Slaby wrote:
> > > > CC e1000e guys + Jesse (due to 75a3f93b5383) + Bjorn (due to b2c289415b2b)
> > > 
> > > Bjorn,
> > > 
> > > I am confused by these changes:
> > > ==========================================
> > > @@ -291,16 +288,13 @@ static int e1000_set_link_ksettings(struct net_device
> > > *net
> > > dev,
> > >           * duplex is forced.
> > >           */
> > >          if (cmd->base.eth_tp_mdix_ctrl) {
> > > -               if (hw->phy.media_type != e1000_media_type_copper) {
> > > -                       ret_val = -EOPNOTSUPP;
> > > -                       goto out;
> > > -               }
> > > +               if (hw->phy.media_type != e1000_media_type_copper)
> > > +                       return -EOPNOTSUPP;
> > > 
> > >                  if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
> > >                      (cmd->base.autoneg != AUTONEG_ENABLE)) {
> > >                          e_err("forcing MDI/MDI-X state is not supported when
> > > lin
> > > k speed and/or duplex are forced\n");
> > > -                       ret_val = -EINVAL;
> > > -                       goto out;
> > > +                       return -EINVAL;
> > >                  }
> > >          }
> > > 
> > > @@ -347,7 +341,6 @@ static int e1000_set_link_ksettings(struct net_device
> > > *netde
> > > v,
> > >          }
> > > 
> > >   out:
> > > -       pm_runtime_put_sync(netdev->dev.parent);
> > >          clear_bit(__E1000_RESETTING, &adapter->state);
> > >          return ret_val;
> > >   }
> > > ==========================================
> > > 
> > > So no more clear_bit(__E1000_RESETTING in the above fail paths. Is that
> > > intentional?
> > 
> > Not intentional.  Petr, do you have the ability to test the patch
> > below?  I'm not sure it's the correct fix, but it reverts the pieces
> > of b2c289415b2b that Jiri pointed out.
> 
> I tested the patch below but it didn't help. After the first boot with new
> kernel it looked promising as the irq storm only appeared for a few seconds,
> but with subsequent reboots it was the same as without the patch.

Thank you very much for testing that!

> To be sure, I also send the md5sum of ethtool.c after applying the patch:
> 
> a25c003257538f16994b4d7c4260e894 ethtool.c

Thanks, that matches what I get when applying the patch on v6.10.

I'm at a loss.  You could try reverting the entire b2c289415b2b commit
(patch for that is below).

If that doesn't help, I guess you could try reverting the other
commits Jiri mentioned:

  76a0a3f9cc2f e1000e: fix force smbus during suspend flow
  c93a6f62cb1b e1000e: Fix S0ix residency on corporate systems
  bfd546a552e1 e1000e: move force SMBUS near the end of enable_ulp function
  6918107e2540 net: e1000e & ixgbe: Remove PCI_HEADER_TYPE_MFD duplicates
  1eb2cded45b3 net: annotate writes on dev->mtu from ndo_change_mtu()
  b2c289415b2b e1000e: Remove redundant runtime resume for ethtool_ops
  75a3f93b5383 net: intel: implement modern PM ops declarations

If you do this, I would revert 76a0a3f9cc2f, test, then revert
c93a6f62cb1b in addition, test, then revert bfd546a552e1 in addition,
etc.

commit 5e92945ffe5c ("Revert "e1000e: Remove redundant runtime resume for ethtool_ops"")
Author: Bjorn Helgaas <bhelgaas@xxxxxxxxxx>
Date:   Tue Aug 20 16:18:32 2024 -0500

    Revert "e1000e: Remove redundant runtime resume for ethtool_ops"
    
    This reverts commit b2c289415b2b2ef112b78d5e73b4acecf5db409e.


diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 9364bc2b4eb1..61fa2f6b3708 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -156,7 +156,7 @@ static int e1000_get_link_ksettings(struct net_device *netdev,
 			speed = adapter->link_speed;
 			cmd->base.duplex = adapter->link_duplex - 1;
 		}
-	} else {
+	} else if (!pm_runtime_suspended(netdev->dev.parent)) {
 		u32 status = er32(STATUS);
 
 		if (status & E1000_STATUS_LU) {
@@ -274,13 +274,16 @@ static int e1000_set_link_ksettings(struct net_device *netdev,
 	ethtool_convert_link_mode_to_legacy_u32(&advertising,
 						cmd->link_modes.advertising);
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	/* When SoL/IDER sessions are active, autoneg/speed/duplex
 	 * cannot be changed
 	 */
 	if (hw->phy.ops.check_reset_block &&
 	    hw->phy.ops.check_reset_block(hw)) {
 		e_err("Cannot change link characteristics when SoL/IDER is active.\n");
-		return -EINVAL;
+		ret_val = -EINVAL;
+		goto out;
 	}
 
 	/* MDI setting is only allowed when autoneg enabled because
@@ -288,13 +291,16 @@ static int e1000_set_link_ksettings(struct net_device *netdev,
 	 * duplex is forced.
 	 */
 	if (cmd->base.eth_tp_mdix_ctrl) {
-		if (hw->phy.media_type != e1000_media_type_copper)
-			return -EOPNOTSUPP;
+		if (hw->phy.media_type != e1000_media_type_copper) {
+			ret_val = -EOPNOTSUPP;
+			goto out;
+		}
 
 		if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
 		    (cmd->base.autoneg != AUTONEG_ENABLE)) {
 			e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
-			return -EINVAL;
+			ret_val = -EINVAL;
+			goto out;
 		}
 	}
 
@@ -341,6 +347,7 @@ static int e1000_set_link_ksettings(struct net_device *netdev,
 	}
 
 out:
+	pm_runtime_put_sync(netdev->dev.parent);
 	clear_bit(__E1000_RESETTING, &adapter->state);
 	return ret_val;
 }
@@ -376,6 +383,8 @@ static int e1000_set_pauseparam(struct net_device *netdev,
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
 		hw->fc.requested_mode = e1000_fc_default;
 		if (netif_running(adapter->netdev)) {
@@ -408,6 +417,7 @@ static int e1000_set_pauseparam(struct net_device *netdev,
 	}
 
 out:
+	pm_runtime_put_sync(netdev->dev.parent);
 	clear_bit(__E1000_RESETTING, &adapter->state);
 	return retval;
 }
@@ -438,6 +448,8 @@ static void e1000_get_regs(struct net_device *netdev,
 	u32 *regs_buff = p;
 	u16 phy_data;
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	memset(p, 0, E1000_REGS_LEN * sizeof(u32));
 
 	regs->version = (1u << 24) |
@@ -483,6 +495,8 @@ static void e1000_get_regs(struct net_device *netdev,
 	e1e_rphy(hw, MII_STAT1000, &phy_data);
 	regs_buff[24] = (u32)phy_data;	/* phy local receiver status */
 	regs_buff[25] = regs_buff[24];	/* phy remote receiver status */
+
+	pm_runtime_put_sync(netdev->dev.parent);
 }
 
 static int e1000_get_eeprom_len(struct net_device *netdev)
@@ -515,6 +529,8 @@ static int e1000_get_eeprom(struct net_device *netdev,
 	if (!eeprom_buff)
 		return -ENOMEM;
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
 		ret_val = e1000_read_nvm(hw, first_word,
 					 last_word - first_word + 1,
@@ -528,6 +544,8 @@ static int e1000_get_eeprom(struct net_device *netdev,
 		}
 	}
 
+	pm_runtime_put_sync(netdev->dev.parent);
+
 	if (ret_val) {
 		/* a read error occurred, throw away the result */
 		memset(eeprom_buff, 0xff, sizeof(u16) *
@@ -577,6 +595,8 @@ static int e1000_set_eeprom(struct net_device *netdev,
 
 	ptr = (void *)eeprom_buff;
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	if (eeprom->offset & 1) {
 		/* need read/modify/write of first changed EEPROM word */
 		/* only the second byte of the word is being modified */
@@ -617,6 +637,7 @@ static int e1000_set_eeprom(struct net_device *netdev,
 		ret_val = e1000e_update_nvm_checksum(hw);
 
 out:
+	pm_runtime_put_sync(netdev->dev.parent);
 	kfree(eeprom_buff);
 	return ret_val;
 }
@@ -712,6 +733,8 @@ static int e1000_set_ringparam(struct net_device *netdev,
 		}
 	}
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	e1000e_down(adapter, true);
 
 	/* We can't just free everything and then setup again, because the
@@ -750,6 +773,7 @@ static int e1000_set_ringparam(struct net_device *netdev,
 		e1000e_free_tx_resources(temp_tx);
 err_setup:
 	e1000e_up(adapter);
+	pm_runtime_put_sync(netdev->dev.parent);
 free_temp:
 	vfree(temp_tx);
 	vfree(temp_rx);
@@ -1792,6 +1816,8 @@ static void e1000_diag_test(struct net_device *netdev,
 	u8 autoneg;
 	bool if_running = netif_running(netdev);
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	set_bit(__E1000_TESTING, &adapter->state);
 
 	if (!if_running) {
@@ -1877,6 +1903,8 @@ static void e1000_diag_test(struct net_device *netdev,
 	}
 
 	msleep_interruptible(4 * 1000);
+
+	pm_runtime_put_sync(netdev->dev.parent);
 }
 
 static void e1000_get_wol(struct net_device *netdev,
@@ -2018,11 +2046,15 @@ static int e1000_set_coalesce(struct net_device *netdev,
 		adapter->itr_setting = adapter->itr & ~3;
 	}
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	if (adapter->itr_setting != 0)
 		e1000e_write_itr(adapter, adapter->itr);
 	else
 		e1000e_write_itr(adapter, 0);
 
+	pm_runtime_put_sync(netdev->dev.parent);
+
 	return 0;
 }
 
@@ -2036,7 +2068,9 @@ static int e1000_nway_reset(struct net_device *netdev)
 	if (!adapter->hw.mac.autoneg)
 		return -EINVAL;
 
+	pm_runtime_get_sync(netdev->dev.parent);
 	e1000e_reinit_locked(adapter);
+	pm_runtime_put_sync(netdev->dev.parent);
 
 	return 0;
 }
@@ -2050,8 +2084,12 @@ static void e1000_get_ethtool_stats(struct net_device *netdev,
 	int i;
 	char *p = NULL;
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	dev_get_stats(netdev, &net_stats);
 
+	pm_runtime_put_sync(netdev->dev.parent);
+
 	for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
 		switch (e1000_gstrings_stats[i].type) {
 		case NETDEV_STATS:
@@ -2108,7 +2146,9 @@ static int e1000_get_rxnfc(struct net_device *netdev,
 		struct e1000_hw *hw = &adapter->hw;
 		u32 mrqc;
 
+		pm_runtime_get_sync(netdev->dev.parent);
 		mrqc = er32(MRQC);
+		pm_runtime_put_sync(netdev->dev.parent);
 
 		if (!(mrqc & E1000_MRQC_RSS_FIELD_MASK))
 			return 0;
@@ -2171,9 +2211,13 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
 		return -EOPNOTSUPP;
 	}
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	ret_val = hw->phy.ops.acquire(hw);
-	if (ret_val)
+	if (ret_val) {
+		pm_runtime_put_sync(netdev->dev.parent);
 		return -EBUSY;
+	}
 
 	/* EEE Capability */
 	ret_val = e1000_read_emi_reg_locked(hw, cap_addr, &phy_data);
@@ -2213,6 +2257,8 @@ static int e1000e_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
 	if (ret_val)
 		ret_val = -ENODATA;
 
+	pm_runtime_put_sync(netdev->dev.parent);
+
 	return ret_val;
 }
 
@@ -2253,12 +2299,16 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
 
 	hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled;
 
+	pm_runtime_get_sync(netdev->dev.parent);
+
 	/* reset the link */
 	if (netif_running(netdev))
 		e1000e_reinit_locked(adapter);
 	else
 		e1000e_reset(adapter);
 
+	pm_runtime_put_sync(netdev->dev.parent);
+
 	return 0;
 }
 




[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]
  Powered by Linux