From: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxxxxxxxxxxxxx> The current rdma_netdev handling in ipoib hooks the tx_timeout handler, but prints out a totally useless message that prevents effective debugging especially when multiple transmit queues are being used. Add a tx_timeout rdma_netdev hook and implement the callback in the hfi1 to print additional information. The existing non-helpful message is avoided when the driver has presented a callback. Reviewed-by: Kaike Wan <kaike.wan@xxxxxxxxx> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxxxxxxxxxxxxx> --- drivers/infiniband/hw/hfi1/ipoib.h | 2 ++ drivers/infiniband/hw/hfi1/ipoib_main.c | 1 + drivers/infiniband/hw/hfi1/ipoib_tx.c | 29 +++++++++++++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 5 +++++ include/rdma/ib_verbs.h | 2 ++ 5 files changed, 39 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index f650cac..1659beb 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -147,4 +147,6 @@ int hfi1_ipoib_rn_get_params(struct ib_device *device, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params); +void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q); + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index 3242290..b8838fa 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -204,6 +204,7 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device, int rc; rn->send = hfi1_ipoib_send; + rn->tx_timeout = hfi1_ipoib_tx_timeout; rn->attach_mcast = hfi1_ipoib_mcast_attach; rn->detach_mcast = hfi1_ipoib_mcast_detach; rn->set_id = hfi1_ipoib_set_id; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 5129dc9..e8dece5 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -866,3 +866,32 @@ void hfi1_ipoib_napi_tx_disable(struct net_device *dev) (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); } } + +void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) +{ + struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + struct hfi1_ipoib_txq *txq = &priv->txqs[q]; + u64 completed = atomic64_read(&txq->complete_txreqs); + + dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n", + (unsigned long long)txq, q, + __netif_subqueue_stopped(dev, txq->q_idx), + atomic_read(&txq->stops), + atomic_read(&txq->no_desc), + atomic_read(&txq->ring_full)); + dd_dev_info(priv->dd, "sde %llx engine %u\n", + (unsigned long long)txq->sde, + txq->sde ? txq->sde->this_idx : 0); + dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); + dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", + txq->sent_txreqs, completed, hfi1_ipoib_used(txq)); + dd_dev_info(priv->dd, "tx_queue_len %u max_items %lu\n", + dev->tx_queue_len, txq->tx_ring.max_items); + dd_dev_info(priv->dd, "head %lu tail %lu\n", + txq->tx_ring.head, txq->tx_ring.tail); + dd_dev_info(priv->dd, "wait queued %u\n", + !list_empty(&txq->wait.list)); + dd_dev_info(priv->dd, "tx_list empty %u\n", + list_empty(&txq->tx_list)); +} + diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e16b40c..2fb2fa16 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1181,7 +1181,12 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) { struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); + if (rn->tx_timeout) { + rn->tx_timeout(dev, txqueue); + return; + } ipoib_warn(priv, "transmit timeout: latency %d msecs\n", jiffies_to_msecs(jiffies - dev_trans_start(dev))); ipoib_warn(priv, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 21c19b1..84f7084 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2215,6 +2215,8 @@ struct rdma_netdev { int set_qkey, u32 qkey); int (*detach_mcast)(struct net_device *dev, struct ib_device *hca, union ib_gid *gid, u16 mlid); + /* timeout */ + void (*tx_timeout)(struct net_device *dev, unsigned int txqueue); }; struct rdma_netdev_alloc_params { -- 1.8.3.1