Hi Seren, please find the updated comment . Thanks Suraj On 12/26/2023 4:40 PM, Suraj Jaiswal wrote: > Hi seren > let me check below on test setup once & get back > > Thanks > Suraj > > On 12/22/2023 8:05 PM, Serge Semin wrote: >> On Fri, Dec 22, 2023 at 02:13:49PM +0530, Suraj Jaiswal wrote: >>> HI Serge, >>> please find commnet inline. >>> >>> Thanks >>> Suraj >>> >>> On 12/21/2023 6:19 PM, Serge Semin wrote: >>>> Hi Suraj >>>> >>>> On Thu, Dec 21, 2023 at 01:06:20PM +0530, Suraj Jaiswal wrote: >>>>> Add support to listen HW safety IRQ like ECC(error >>>>> correction code), DPP(data path parity), FSM(finite state >>>>> machine) fault in common IRQ line. >>>>> >>>>> Signed-off-by: Suraj Jaiswal <quic_jsuraj@xxxxxxxxxxx> >>>> >>>> Thanks for taking my notes into account. One more comment is further >>>> below. >>>> >>>>> --- >>>>> drivers/net/ethernet/stmicro/stmmac/common.h | 1 + >>>>> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 3 ++ >>>>> .../net/ethernet/stmicro/stmmac/stmmac_main.c | 37 +++++++++++++++++++ >>>>> .../ethernet/stmicro/stmmac/stmmac_platform.c | 8 ++++ >>>>> 4 files changed, 49 insertions(+) >>>>> >>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h >>>>> index 721c1f8e892f..b9233b09b80f 100644 >>>>> --- a/drivers/net/ethernet/stmicro/stmmac/common.h >>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/common.h >>>>> @@ -344,6 +344,7 @@ enum request_irq_err { >>>>> REQ_IRQ_ERR_ALL, >>>>> REQ_IRQ_ERR_TX, >>>>> REQ_IRQ_ERR_RX, >>>>> + REQ_IRQ_ERR_SFTY, >>>>> REQ_IRQ_ERR_SFTY_UE, >>>>> REQ_IRQ_ERR_SFTY_CE, >>>>> REQ_IRQ_ERR_LPI, >>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h >>>>> index 9f89acf31050..ca3d93851bed 100644 >>>>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h >>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h >>>>> @@ -31,6 +31,7 @@ struct stmmac_resources { >>>>> int wol_irq; >>>>> int lpi_irq; >>>>> int irq; >>>>> + int sfty_irq; >>>>> int sfty_ce_irq; >>>>> int sfty_ue_irq; >>>>> int rx_irq[MTL_MAX_RX_QUEUES]; >>>>> @@ -297,6 +298,7 @@ struct stmmac_priv { >>>>> void __iomem *ptpaddr; >>>>> void __iomem *estaddr; >>>>> unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; >>>>> + int sfty_irq; >>>>> int sfty_ce_irq; >>>>> int sfty_ue_irq; >>>>> int rx_irq[MTL_MAX_RX_QUEUES]; >>>>> @@ -305,6 +307,7 @@ struct stmmac_priv { >>>>> char int_name_mac[IFNAMSIZ + 9]; >>>>> char int_name_wol[IFNAMSIZ + 9]; >>>>> char int_name_lpi[IFNAMSIZ + 9]; >>>>> + char int_name_sfty[IFNAMSIZ + 10]; >>>>> char int_name_sfty_ce[IFNAMSIZ + 10]; >>>>> char int_name_sfty_ue[IFNAMSIZ + 10]; >>>>> char int_name_rx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 14]; >>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >>>>> index 47de466e432c..7d4e827dfeab 100644 >>>>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >>>>> @@ -3592,6 +3592,10 @@ static void stmmac_free_irq(struct net_device *dev, >>>>> if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) >>>>> free_irq(priv->wol_irq, dev); >>>>> fallthrough; >>>>> + case REQ_IRQ_ERR_SFTY: >>>>> + if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq) >>>>> + free_irq(priv->sfty_irq, dev); >>>>> + fallthrough; >>>>> case REQ_IRQ_ERR_WOL: >>>>> free_irq(dev->irq, dev); >>>>> fallthrough; >>>>> @@ -3661,6 +3665,23 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) >>>>> } >>>>> } >>>>> >>>>> + /* Request the common Safety Feature Correctible/Uncorrectible >>>>> + * Error line in case of another line is used >>>>> + */ >>>>> + if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq) { >>>>> + int_name = priv->int_name_sfty; >>>>> + sprintf(int_name, "%s:%s", dev->name, "safety"); >>>>> + ret = request_irq(priv->sfty_irq, stmmac_safety_interrupt, >>>>> + 0, int_name, dev); >>>>> + if (unlikely(ret < 0)) { >>>>> + netdev_err(priv->dev, >>>>> + "%s: alloc sfty MSI %d (error: %d)\n", >>>>> + __func__, priv->sfty_irq, ret); >>>>> + irq_err = REQ_IRQ_ERR_SFTY; >>>>> + goto irq_error; >>>>> + } >>>>> + } >>>>> + >>>>> /* Request the Safety Feature Correctible Error line in >>>>> * case of another line is used >>>>> */ >>>>> @@ -3798,6 +3819,21 @@ static int stmmac_request_irq_single(struct net_device *dev) >>>>> } >>>>> } >>>>> >>>>> + /* Request the common Safety Feature Correctible/Uncorrectible >>>>> + * Error line in case of another line is used >>>>> + */ >>>>> + if (priv->sfty_irq > 0 && priv->sfty_irq != dev->irq) { >>>> >>>>> + ret = request_irq(priv->sfty_irq, stmmac_safety_interrupt, >>>>> + IRQF_SHARED, dev->name, dev); >>>> >>>> Just noticed yesterday that stmmac_safety_interrupt() is also called >>>> from the stmmac_interrupt() handler which is supposed to be registered >>>> on the generic "mac" IRQ. Won't it cause races around the CSRs >>>> (doubtfully but still worth to note) and the errors handling >>>> (stmmac_global_err()) in case if both IRQs are raised simultaneously? >>>> At the very least it looks suspicious and worth double-checking. >>>> >>>> I also found out that nobody seemed to care that the same handler is >>>> registered on MAC, WoL and LPI IRQ lines. Hmm, no race-related >>>> problems have been reported so far for the platforms with separate >>>> WoL/LPI IRQs. It's either a lucky coincident or the IRQs are always >>>> assigned to the same CPU or the IRQs handle is indeed free of races. >>>> In anyway it looks suspicious too. At the very least AFAICS the DMA >>>> IRQ-handler is indeed racy on the status CSR access. It isn't >>>> cleared-on-read, but write-one-to-clear. So the statistics might be >>>> calculated more than once for the same CSR state. There might be some >>>> other problems I failed to spot on the first glance. >>>> >>>> David, Eric, Jacub, Paolo, your opinion about the note above? >>>> >>>> -Serge(y) >>>> >> >>> <Suraj> We are adding common IRQ similar to already present code for correcteable/uncorrecable https://elixir.bootlin.com/linux/latest/source/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c#L3592. >> >> From that perspective your change in stmmac_request_irq_multi_msi() is >> correct, but stmmac_request_irq_single() is another story. The first >> one method implies assigning the individual IRQ handlers to all >> available lines. The later method assigns the _common_ handler to all >> the lines. The common handler already calls the Safety IRQ handler - >> stmmac_safety_feat_interrupt(). So should the safety IRQ line is >> separately available it's possible to have the Safety IRQ handlers >> executed concurrently - in framework of the common IRQ events handling >> (if safety IRQ is raised during the common IRQ being handled) and >> individual Safety IRQ. It's prune to the race condition I pointed out >> to in my message above. Did you consider that problem? >> >>> Also, we need the sfty IRQ handling as soon as the fault occured & that can only be handled if we have handler attached with sfty IRQ. >>> stmmac_interrupt() will only be triggerd when interrupt triggered for rx/tx packet . >>> while registerting with sfty IRQ will get triggered as soon as emac HW detect the fault. >> >> Please read my comment more carefully. The safety IRQ can be raised >> during the common IRQ handling, thus the >> stmmac_safety_feat_interrupt() method might get to be concurrently >> executed. >> >> -Serge(y) >> <Suraj> Have testing this on device . We have added print in the both the places stmmac_interrupt() as well as sfty interrupt handler. We can see that sfty interrupt handler is getting triggred first & stmmac_safety_feat_interrupt () code added in stmmac_intterupt() is not getting triggred because looks like interrupt status bit register is already getting cleared as part of sfty interrupt handler. So it looks good . Please let us know if any further comment. Please find the log below . / # [ 1505.602173] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1505.607274] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'RXCES: MTL RX Memory Error' [ 1505.617395] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1505.622494] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'TXCES: MTL TX Memory Error' [ 1505.888913] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1505.894010] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'RXCES: MTL RX Memory Error' [ 1506.605821] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1506.610919] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'RXCES: MTL RX Memory Error' [ 1506.621034] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1506.626131] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'TXCES: MTL TX Memory Error' [ 1507.613036] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1507.618133] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'RXCES: MTL RX Memory Error' [ 1507.628249] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1507.633346] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'TXCES: MTL TX Memory Error' [ 1508.619034] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1508.624132] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'RXCES: MTL RX Memory Error' [ 1508.634245] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1508.639343] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'TXCES: MTL TX Memory Error' [ 1509.631151] sj: stmmac_safety_interrupt from sfty IRQ handler [ 1509.636249] qcom-ethqos 23040000.ethernet eth1: Found correctable error in MTL: 'RXCES: MTL RX Memory Error' >>> >>>>> + if (unlikely(ret < 0)) { >>>>> + netdev_err(priv->dev, >>>>> + "%s: ERROR: allocating the sfty IRQ %d (%d)\n", >>>>> + __func__, priv->sfty_irq, ret); >>>>> + irq_err = REQ_IRQ_ERR_SFTY; >>>>> + goto irq_error; >>>>> + } >>>>> + } >>>>> + >>>>> return 0; >>>>> >>>>> irq_error: >>>>> @@ -7462,6 +7498,7 @@ int stmmac_dvr_probe(struct device *device, >>>>> priv->dev->irq = res->irq; >>>>> priv->wol_irq = res->wol_irq; >>>>> priv->lpi_irq = res->lpi_irq; >>>>> + priv->sfty_irq = res->sfty_irq; >>>>> priv->sfty_ce_irq = res->sfty_ce_irq; >>>>> priv->sfty_ue_irq = res->sfty_ue_irq; >>>>> for (i = 0; i < MTL_MAX_RX_QUEUES; i++) >>>>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c >>>>> index 70eadc83ca68..ab250161fd79 100644 >>>>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c >>>>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c >>>>> @@ -743,6 +743,14 @@ int stmmac_get_platform_resources(struct platform_device *pdev, >>>>> dev_info(&pdev->dev, "IRQ eth_lpi not found\n"); >>>>> } >>>>> >>>>> + stmmac_res->sfty_irq = >>>>> + platform_get_irq_byname_optional(pdev, "sfty"); >>>>> + if (stmmac_res->sfty_irq < 0) { >>>>> + if (stmmac_res->sfty_irq == -EPROBE_DEFER) >>>>> + return -EPROBE_DEFER; >>>>> + dev_info(&pdev->dev, "IRQ safety IRQ not found\n"); >>>>> + } >>>>> + >>>>> stmmac_res->addr = devm_platform_ioremap_resource(pdev, 0); >>>>> >>>>> return PTR_ERR_OR_ZERO(stmmac_res->addr); >>>>> -- >>>>> 2.25.1 >>>>> >>>>> >