This patch tries to works around erratum DS80000789E 6 of the mcp2518fd, the other variants of the chip family (mcp2517fd and mcp251863) are probably also affected. In the bad case, the driver reads a too large head index. In the original code, the driver always trusted the read value, which caused old, already processed CAN frames or new, incompletely written CAN frames to be (re-)processed. To work around this issue, keep a per FIFO timestamp [1] of the last valid received CAN frame and compare against the timestamp of every received CAN frame. If an old CAN frame is detected, abort the iteration and mark the number of valid CAN frames as processed in the chip by incrementing the FIFO's tail index. Further tests showed that this workaround can recognize old CAN frames, but a small time window remains in which partially written CAN frames [2] are not recognized but then processed. These CAN frames have the correct data and time stamps, but the DLC has not yet been updated. [1] As the raw timestamp overflows every 107 seconds (at the usual clock rate of 40 MHz) convert it to nanoseconds with the timecounter framework and use this to detect stale CAN frames. Link: https://lore.kernel.org/all/BL3PR11MB64844C1C95CA3BDADAE4D8CCFBC99@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx [2] Reported-by: Stefan Althöfer <Stefan.Althoefer@xxxxxxxxxxx> Closes: https://lore.kernel.org/all/FR0P281MB1966273C216630B120ABB6E197E89@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Tested-by: Stefan Althöfer <Stefan.Althoefer@xxxxxxxxxxx> Tested-by: Thomas Kopp <thomas.kopp@xxxxxxxxxxxxx> Signed-off-by: Marc Kleine-Budde <mkl@xxxxxxxxxxxxxx> --- .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 1 + drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 32 +++++++++++++++++-- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 3 ++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index b308a375e26e..8464fc4f37d0 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -206,6 +206,7 @@ mcp251xfd_ring_init_rx(struct mcp251xfd_priv *priv, u16 *base, u8 *fifo_nr) int i, j; mcp251xfd_for_each_rx_ring(priv, rx_ring, i) { + rx_ring->last_valid = timecounter_read(&priv->tc); rx_ring->head = 0; rx_ring->tail = 0; rx_ring->base = *base; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c index a79e6c661ecc..fe897f3e4c12 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c @@ -159,8 +159,6 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) memcpy(cfd->data, hw_rx_obj->data, cfd->len); - - mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_rx_obj->ts); } static int @@ -171,8 +169,26 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, struct net_device_stats *stats = &priv->ndev->stats; struct sk_buff *skb; struct canfd_frame *cfd; + u64 timestamp; int err; + /* According to mcp2518fd erratum DS80000789E 6. the FIFOCI + * bits of a FIFOSTA register, here the RX FIFO head index + * might be corrupted and we might process past the RX FIFO's + * head into old CAN frames. + * + * Compare the timestamp of currently processed CAN frame with + * last valid frame received. Abort with -EBADMSG if an old + * CAN frame is detected. + */ + timestamp = timecounter_cyc2time(&priv->tc, hw_rx_obj->ts); + if (timestamp <= ring->last_valid) { + stats->rx_fifo_errors++; + + return -EBADMSG; + } + ring->last_valid = timestamp; + if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) skb = alloc_canfd_skb(priv->ndev, &cfd); else @@ -183,6 +199,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, return 0; } + mcp251xfd_skb_set_timestamp(skb, timestamp); mcp251xfd_hw_rx_obj_to_skb(priv, hw_rx_obj, skb); err = can_rx_offload_queue_timestamp(&priv->offload, skb, hw_rx_obj->ts); if (err) @@ -265,7 +282,16 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, err = mcp251xfd_handle_rxif_one(priv, ring, (void *)hw_rx_obj + i * ring->obj_size); - if (err) + + /* -EBADMSG means we're affected by mcp2518fd + * erratum DS80000789E 6., i.e. the timestamp + * in the RX object is older that the last + * valid received CAN frame. Don't process any + * further and mark processed frames as good. + */ + if (err == -EBADMSG) + return mcp251xfd_handle_rxif_ring_uinc(priv, ring, i); + else if (err) return err; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 453a2b193c75..d32ece3d7aee 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -552,6 +552,9 @@ struct mcp251xfd_rx_ring { unsigned int head; unsigned int tail; + /* timestamp of the last valid received CAN frame */ + u64 last_valid; + u16 base; u8 nr; u8 fifo_nr; -- 2.43.0