> > +static void > > +mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct > > +gdma_event *event) { > > + struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx; > > + struct mana_ib_qp *qp; > > + struct ib_event ev; > > + unsigned long flag; > > + u32 qpn; > > + > > + switch (event->type) { > > + case GDMA_EQE_RNIC_QP_FATAL: > > + qpn = event->details[0]; > > + xa_lock_irqsave(&mdev->qp_table_rq, flag); > > + qp = xa_load(&mdev->qp_table_rq, qpn); > > + if (qp) > > + refcount_inc(&qp->refcount); > > + xa_unlock_irqrestore(&mdev->qp_table_rq, flag); > > + if (!qp) > > + break; > > + if (qp->ibqp.event_handler) { > > + ev.device = qp->ibqp.device; > > + ev.element.qp = &qp->ibqp; > > + ev.event = IB_EVENT_QP_FATAL; > > + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); > > + } > > + if (refcount_dec_and_test(&qp->refcount)) > > + complete(&qp->free); > > + break; > > + default: > > + break; > > + } > > +} > > <...> > > > @@ -620,6 +626,11 @@ static int mana_ib_destroy_rc_qp(struct > mana_ib_qp *qp, struct ib_udata *udata) > > container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); > > int i; > > > > + xa_erase_irq(&mdev->qp_table_rq, qp->ibqp.qp_num); > > + if (refcount_dec_and_test(&qp->refcount)) > > + complete(&qp->free); > > + wait_for_completion(&qp->free); > > This flow is unclear to me. You are destroying the QP and need to make sure > that mana_ib_event_handler is not running at that point and not mess with > refcount and complete. Hi, Leon. Thanks for the concern. Here is the clarification: The flow is similar to what mlx5 does with mlx5_get_rsc and mlx5_core_put_rsc. When we get a QP resource, we increment the counter while holding the xa lock. So, when we destroy a QP, the code first removes the QP from the xa to ensure that nobody can get it. And then we check whether mana_ib_event_handler is holding it with refcount_dec_and_test. If the QP is held, then we wait for the mana_ib_event_handler to call complete. Once the wait returns, it is impossible to get the QP referenced, since it is not in the xa and all references have been removed. So now we can release the QP in HW, so the QPN can be assigned to new QPs. Leon, have you spotted a bug? Or just wanted to understand the flow? Thanks > > Thanks