This event is handled by the RDMA core layer. Signed-off-by: Max Gurtovoy <mgurtovoy@xxxxxxxxxx> --- drivers/infiniband/ulp/ipoib/ipoib.h | 33 +----------- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 71 +++---------------------- 2 files changed, 8 insertions(+), 96 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 963e936da5e3..0f1e4b431af4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -198,37 +198,10 @@ struct ipoib_cm_data { __be32 mtu; }; -/* - * Quoting 10.3.1 Queue Pair and EE Context States: - * - * Note, for QPs that are associated with an SRQ, the Consumer should take the - * QP through the Error State before invoking a Destroy QP or a Modify QP to the - * Reset State. The Consumer may invoke the Destroy QP without first performing - * a Modify QP to the Error State and waiting for the Affiliated Asynchronous - * Last WQE Reached Event. However, if the Consumer does not wait for the - * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment - * leakage may occur. Therefore, it is good programming practice to tear down a - * QP that is associated with an SRQ by using the following process: - * - * - Put the QP in the Error State - * - Wait for the Affiliated Asynchronous Last WQE Reached Event; - * - either: - * drain the CQ by invoking the Poll CQ verb and either wait for CQ - * to be empty or the number of Poll CQ operations has exceeded - * CQ capacity size; - * - or - * post another WR that completes on the same CQ and wait for this - * WR to return as a WC; - * - and then invoke a Destroy QP or Reset QP. - * - * We use the second option and wait for a completion on the - * same CQ before destroying QPs attached to our SRQ. - */ - enum ipoib_cm_state { IPOIB_CM_RX_LIVE, IPOIB_CM_RX_ERROR, /* Ignored by stale task */ - IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */ + IPOIB_CM_RX_FLUSH }; struct ipoib_cm_rx { @@ -267,9 +240,7 @@ struct ipoib_cm_dev_priv { struct ib_cm_id *id; struct list_head passive_ids; /* state: LIVE */ struct list_head rx_error_list; /* state: ERROR */ - struct list_head rx_flush_list; /* state: FLUSH, drain not started */ - struct list_head rx_drain_list; /* state: FLUSH, drain started */ - struct list_head rx_reap_list; /* state: FLUSH, drain done */ + struct list_head rx_reap_list; /* state: FLUSH */ struct work_struct start_task; struct work_struct reap_task; struct work_struct skb_task; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index b610d36295bb..18ead80b5756 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -71,12 +71,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = { .qp_state = IB_QPS_ERR }; -#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff - -static struct ib_send_wr ipoib_cm_rx_drain_wr = { - .opcode = IB_WR_SEND, -}; - static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, const struct ib_cm_event *event); @@ -208,50 +202,11 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, vfree(rx_ring); } -static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) -{ - struct ipoib_cm_rx *p; - - /* We only reserved 1 extra slot in CQ for drain WRs, so - * make sure we have at most 1 outstanding WR. */ - if (list_empty(&priv->cm.rx_flush_list) || - !list_empty(&priv->cm.rx_drain_list)) - return; - - /* - * QPs on flush list are error state. This way, a "flush - * error" WC will be immediately generated for each WR we post. - */ - p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); - ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; - if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL)) - ipoib_warn(priv, "failed to post drain wr\n"); - - list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); -} - -static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) -{ - struct ipoib_cm_rx *p = ctx; - struct ipoib_dev_priv *priv = ipoib_priv(p->dev); - unsigned long flags; - - if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) - return; - - spin_lock_irqsave(&priv->lock, flags); - list_move(&p->list, &priv->cm.rx_flush_list); - p->state = IPOIB_CM_RX_FLUSH; - ipoib_cm_start_rx_drain(priv); - spin_unlock_irqrestore(&priv->lock, flags); -} - static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, struct ipoib_cm_rx *p) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_init_attr attr = { - .event_handler = ipoib_cm_rx_event_handler, .send_cq = priv->recv_cq, /* For drain WR */ .recv_cq = priv->recv_cq, .srq = priv->cm.srq, @@ -479,8 +434,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, spin_lock_irq(&priv->lock); queue_delayed_work(priv->wq, &priv->cm.stale_task, IPOIB_CM_RX_DELAY); - /* Add this entry to passive ids list head, but do not re-add it - * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ + /* Add this entry to passive ids list head. */ p->jiffies = jiffies; if (p->state == IPOIB_CM_RX_LIVE) list_move(&p->list, &priv->cm.passive_ids); @@ -574,15 +528,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) wr_id, wc->status); if (unlikely(wr_id >= ipoib_recvq_size)) { - if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { - spin_lock_irqsave(&priv->lock, flags); - list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); - ipoib_cm_start_rx_drain(priv); - queue_work(priv->wq, &priv->cm.rx_reap_task); - spin_unlock_irqrestore(&priv->lock, flags); - } else - ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", - wr_id, ipoib_recvq_size); + ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", + wr_id, ipoib_recvq_size); return; } @@ -603,6 +550,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) else { if (!--p->recv_count) { spin_lock_irqsave(&priv->lock, flags); + p->state = IPOIB_CM_RX_FLUSH; list_move(&p->list, &priv->cm.rx_reap_list); spin_unlock_irqrestore(&priv->lock, flags); queue_work(priv->wq, &priv->cm.rx_reap_task); @@ -912,6 +860,7 @@ static void ipoib_cm_free_rx_reap_list(struct net_device *dev) spin_unlock_irq(&priv->lock); list_for_each_entry_safe(rx, n, &list, list) { + ib_drain_qp(rx->qp); ib_destroy_cm_id(rx->id); ib_destroy_qp(rx->qp); if (!ipoib_cm_has_srq(dev)) { @@ -952,21 +901,15 @@ void ipoib_cm_dev_stop(struct net_device *dev) /* Wait for all RX to be drained */ begin = jiffies; - while (!list_empty(&priv->cm.rx_error_list) || - !list_empty(&priv->cm.rx_flush_list) || - !list_empty(&priv->cm.rx_drain_list)) { + while (!list_empty(&priv->cm.rx_error_list)) { if (time_after(jiffies, begin + 5 * HZ)) { ipoib_warn(priv, "RX drain timing out\n"); /* * assume the HW is wedged and just free up everything. */ - list_splice_init(&priv->cm.rx_flush_list, - &priv->cm.rx_reap_list); list_splice_init(&priv->cm.rx_error_list, &priv->cm.rx_reap_list); - list_splice_init(&priv->cm.rx_drain_list, - &priv->cm.rx_reap_list); break; } spin_unlock_irq(&priv->lock); @@ -1589,8 +1532,6 @@ int ipoib_cm_dev_init(struct net_device *dev) INIT_LIST_HEAD(&priv->cm.reap_list); INIT_LIST_HEAD(&priv->cm.start_list); INIT_LIST_HEAD(&priv->cm.rx_error_list); - INIT_LIST_HEAD(&priv->cm.rx_flush_list); - INIT_LIST_HEAD(&priv->cm.rx_drain_list); INIT_LIST_HEAD(&priv->cm.rx_reap_list); INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); -- 2.18.1