3.10-stable review patch. If anyone has any objections, please let me know. ------------------ From: Eugenia Emantayev <eugenia@xxxxxxxxxxxx> commit 2d4b646613d6b12175b017aca18113945af1faf3 upstream. Fix a race between BlueFlame flow and stamping in post send flow. Example: SW: Build WQE 0 on the TX buffer, except the ownership bit SW: Set ownership for WQE 0 on the TX buffer SW: Ring doorbell for WQE 0 SW: Build WQE 1 on the TX buffer, except the ownership bit SW: Set ownership for WQE 1 on the TX buffer HW: Read WQE 0 and then WQE 1, before doorbell was rung/BF was done for WQE 1 HW: Produce CQEs for WQE 0 and WQE 1 SW: Process the CQEs, and stamp WQE 0 and WQE 1 accordingly (on the TX buffer) SW: Copy WQE 1 from the TX buffer to the BF register - ALREADY STAMPED! HW: CQE error with index 0xFFFF - the BF WQE's control segment is STAMPED, so the BF index is 0xFFFF. Error: Invalid Opcode. As a result QP enters the error state and no traffic can be sent. Solution: When stamping - do not stamp last completed wqe. Signed-off-by: Eugenia Emantayev <eugenia@xxxxxxxxxxxx> Signed-off-by: Amir Vadai <amirv@xxxxxxxxxxxx> Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx> Cc: Vinson Lee <vlee@xxxxxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 61 +++++++++++++++++++---------- 1 file changed, 42 insertions(+), 19 deletions(-) --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -191,6 +191,39 @@ void mlx4_en_deactivate_tx_ring(struct m MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } +static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, int index, + u8 owner) +{ + __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); + struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + void *end = ring->buf + ring->buf_size; + __be32 *ptr = (__be32 *)tx_desc; + int i; + + /* Optimize the common case when there are no wraparounds */ + if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + } + } else { + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; + i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + if ((void *)ptr >= end) { + ptr = ring->buf; + stamp ^= cpu_to_be32(0x80000000); + } + } + } +} + static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, @@ -205,8 +238,6 @@ static u32 mlx4_en_free_tx_desc(struct m void *end = ring->buf + ring->buf_size; int frags = skb_shinfo(skb)->nr_frags; int i; - __be32 *ptr = (__be32 *)tx_desc; - __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); struct skb_shared_hwtstamps hwts; if (timestamp) { @@ -232,12 +263,6 @@ static u32 mlx4_en_free_tx_desc(struct m skb_frag_size(frag), PCI_DMA_TODEVICE); } } - /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { - *ptr = stamp; - ptr += STAMP_DWORDS; - } - } else { if (!tx_info->inl) { if ((void *) data >= end) { @@ -263,16 +288,6 @@ static u32 mlx4_en_free_tx_desc(struct m ++data; } } - /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { - *ptr = stamp; - ptr += STAMP_DWORDS; - if ((void *) ptr >= end) { - ptr = ring->buf; - stamp ^= cpu_to_be32(0x80000000); - } - } - } dev_kfree_skb_any(skb); return tx_info->nr_txbb; @@ -318,8 +333,9 @@ static void mlx4_en_process_tx_cq(struct struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; struct mlx4_cqe *cqe; u16 index; - u16 new_index, ring_index; + u16 new_index, ring_index, stamp_index; u32 txbbs_skipped = 0; + u32 txbbs_stamp = 0; u32 cons_index = mcq->cons_index; int size = cq->size; u32 size_mask = ring->size_mask; @@ -335,6 +351,7 @@ static void mlx4_en_process_tx_cq(struct index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; ring_index = ring->cons & size_mask; + stamp_index = ring_index; /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, @@ -359,6 +376,12 @@ static void mlx4_en_process_tx_cq(struct priv, ring, ring_index, !!((ring->cons + txbbs_skipped) & ring->size), timestamp); + + mlx4_en_stamp_wqe(priv, ring, stamp_index, + !!((ring->cons + txbbs_stamp) & + ring->size)); + stamp_index = ring_index; + txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html