Hi, On 1/11/24 18:31, Liming Sun wrote: > Starting from Linux 5.16 kernel, Tx timeout mechanism was added > in the virtio_net driver which prints the "Tx timeout" warning > message when a packet stays in Tx queue for too long. Below is an > example of the reported message: > > "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on > queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since > last trans: 3079892256". > > This issue could happen when external host driver which drains the > FIFO is restared, stopped or upgraded. To avoid such confusing > "Tx timeout" messages, this commit adds logic to drop the outstanding > Tx packet if it's not able to transmit in two seconds due to Tx FIFO > full, which can be considered as congestion or out-of-resource drop. > > This commit also handles the special case that the packet is half- > transmitted into the Tx FIFO. In such case, the packet is discarded > with remaining length stored in vring->rem_padding. So paddings with > zeros can be sent out when Tx space is available to maintain the > integrity of the packet format. The padded packet will be dropped on > the receiving side. > > Signed-off-by: Liming Sun <limings@xxxxxxxxxx> Thank you for your patch/series, I've applied this patch (series) to my review-hans branch: https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans Note it will show up in the pdx86 review-hans branch once I've pushed my local branch there, which might take a while. I will include this patch in my next fixes pull-req to Linus for the current kernel development cycle. Regards, Hans > --- > v2->v3: > Updates for Ilpo's comments: > - Revises commit message to avoid confusion. > v2: Fixed formatting warning > v1: Initial version > --- > drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++ > 1 file changed, 67 insertions(+) > > diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c > index 5c683b4eaf10..f39b7b9d2bfe 100644 > --- a/drivers/platform/mellanox/mlxbf-tmfifo.c > +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c > @@ -47,6 +47,9 @@ > /* Message with data needs at least two words (for header & data). */ > #define MLXBF_TMFIFO_DATA_MIN_WORDS 2 > > +/* Tx timeout in milliseconds. */ > +#define TMFIFO_TX_TIMEOUT 2000 > + > /* ACPI UID for BlueField-3. */ > #define TMFIFO_BF3_UID 1 > > @@ -62,12 +65,14 @@ struct mlxbf_tmfifo; > * @drop_desc: dummy desc for packet dropping > * @cur_len: processed length of the current descriptor > * @rem_len: remaining length of the pending packet > + * @rem_padding: remaining bytes to send as paddings > * @pkt_len: total length of the pending packet > * @next_avail: next avail descriptor id > * @num: vring size (number of descriptors) > * @align: vring alignment size > * @index: vring index > * @vdev_id: vring virtio id (VIRTIO_ID_xxx) > + * @tx_timeout: expire time of last tx packet > * @fifo: pointer to the tmfifo structure > */ > struct mlxbf_tmfifo_vring { > @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring { > struct vring_desc drop_desc; > int cur_len; > int rem_len; > + int rem_padding; > u32 pkt_len; > u16 next_avail; > int num; > int align; > int index; > int vdev_id; > + unsigned long tx_timeout; > struct mlxbf_tmfifo *fifo; > }; > > @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring, > return true; > } > > +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring) > +{ > + unsigned long flags; > + > + /* Only handle Tx timeout for network vdev. */ > + if (vring->vdev_id != VIRTIO_ID_NET) > + return; > + > + /* Initialize the timeout or return if not expired. */ > + if (!vring->tx_timeout) { > + /* Initialize the timeout. */ > + vring->tx_timeout = jiffies + > + msecs_to_jiffies(TMFIFO_TX_TIMEOUT); > + return; > + } else if (time_before(jiffies, vring->tx_timeout)) { > + /* Return if not timeout yet. */ > + return; > + } > + > + /* > + * Drop the packet after timeout. The outstanding packet is > + * released and the remaining bytes will be sent with padding byte 0x00 > + * as a recovery. On the peer(host) side, the padding bytes 0x00 will be > + * either dropped directly, or appended into existing outstanding packet > + * thus dropped as corrupted network packet. > + */ > + vring->rem_padding = round_up(vring->rem_len, sizeof(u64)); > + mlxbf_tmfifo_release_pkt(vring); > + vring->cur_len = 0; > + vring->rem_len = 0; > + vring->fifo->vring[0] = NULL; > + > + /* > + * Make sure the load/store are in order before > + * returning back to virtio. > + */ > + virtio_mb(false); > + > + /* Notify upper layer. */ > + spin_lock_irqsave(&vring->fifo->spin_lock[0], flags); > + vring_interrupt(0, vring->vq); > + spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags); > +} > + > /* Rx & Tx processing of a queue. */ > static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) > { > @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) > return; > > do { > +retry: > /* Get available FIFO space. */ > if (avail == 0) { > if (is_rx) > @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) > break; > } > > + /* Insert paddings for discarded Tx packet. */ > + if (!is_rx) { > + vring->tx_timeout = 0; > + while (vring->rem_padding >= sizeof(u64)) { > + writeq(0, vring->fifo->tx.data); > + vring->rem_padding -= sizeof(u64); > + if (--avail == 0) > + goto retry; > + } > + } > + > /* Console output always comes from the Tx buffer. */ > if (!is_rx && devid == VIRTIO_ID_CONSOLE) { > mlxbf_tmfifo_console_tx(fifo, avail); > @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx) > /* Handle one descriptor. */ > more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail); > } while (more); > + > + /* Check Tx timeout. */ > + if (avail <= 0 && !is_rx) > + mlxbf_tmfifo_check_tx_timeout(vring); > } > > /* Handle Rx or Tx queues. */