On Mon, Oct 28, 2019 at 05:45:44PM +0800, Yixian Liu wrote: > @@ -1998,6 +2000,17 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) > } > } > > + snprintf(workq_name, HNS_ROCE_WORKQ_NAME_LEN - 1, > + "hns_roce_%d_flush_wq", device_id); > + device_id++; > + > + hr_dev->flush_workq = alloc_workqueue(workq_name, WQ_HIGHPRI, 0); > + if (!hr_dev->flush_workq) { Why is this so time critical? > diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c > index bec48f2..2c8f726 100644 > +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c > @@ -43,6 +43,49 @@ > > #define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) > > +static void flush_work_handle(struct work_struct *work) > +{ > + struct hns_roce_flush_work *flush_work = container_of(work, > + struct hns_roce_flush_work, work); > + struct hns_roce_qp *hr_qp = flush_work->hr_qp; > + struct device *dev = flush_work->hr_dev->dev; > + struct ib_qp_attr attr; > + int attr_mask; > + int ret; > + > + attr_mask = IB_QP_STATE; > + attr.qp_state = IB_QPS_ERR; > + > + ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL); > + if (ret) > + dev_err(dev, "Modify QP to error state failed(%d) during CQE flush\n", > + ret); There is something wrong with your description as all this seems to do is tell the HW to go to the ERR state. Why don't you do this from hns_roce_irq_work_handle() ? > + kfree(flush_work); > + > + /* > + * make sure we signal QP destroy leg that flush QP was completed > + * so that it can safely proceed ahead now and destroy QP > + */ > + if (atomic_dec_and_test(&hr_qp->refcount)) > + complete(&hr_qp->free); > +} > + > +void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) > +{ > + struct hns_roce_flush_work *flush_work; > + > + flush_work = kzalloc(sizeof(struct hns_roce_flush_work), GFP_ATOMIC); > + if (!flush_work) > + return; Don't do things that can fail here > + > + flush_work->hr_dev = hr_dev; > + flush_work->hr_qp = hr_qp; > + INIT_WORK(&flush_work->work, flush_work_handle); > + atomic_inc(&hr_qp->refcount); > + queue_work(hr_dev->flush_workq, &flush_work->work); > +} > + > void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) > { > struct device *dev = hr_dev->dev;