On 2024/7/5 18:47, Zhu Yanjun wrote: > 在 2024/7/5 16:59, Junxian Huang 写道: >> CEQEs are handled in interrupt handler currently. This may cause the >> CPU core staying in interrupt context too long and lead to soft lockup >> under heavy load. >> >> Handle CEQEs in tasklet and set an upper limit for the number of CEQE >> handled by a single call of tasklet. > > https://patchwork.kernel.org/project/linux-rdma/cover/20240621050525.3720069-1-allen.lkml@xxxxxxxxx/ > > In the above link, it seems that tasklet is not good enough. The tasklet is marked deprecated and has some design flaws. It is being replace BH workqueue. > > So directly use workqueue instead of tasklet? > > Zhu Yanjun > Thanks, I'll have a look at it. Junxian >> >> Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") >> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx> >> --- >> drivers/infiniband/hw/hns/hns_roce_device.h | 1 + >> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 88 ++++++++++++--------- >> 2 files changed, 53 insertions(+), 36 deletions(-) >> >> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h >> index 05005079258c..5a2445f357ab 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_device.h >> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h >> @@ -717,6 +717,7 @@ struct hns_roce_eq { >> int shift; >> int event_type; >> int sub_type; >> + struct tasklet_struct tasklet; >> }; >> struct hns_roce_eq_table { >> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> index ff135df1a761..f73de06a3ca5 100644 >> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c >> @@ -6146,33 +6146,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) >> !!(eq->cons_index & eq->entries)) ? ceqe : NULL; >> } >> -static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, >> - struct hns_roce_eq *eq) >> +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq) >> { >> - struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); >> - irqreturn_t ceqe_found = IRQ_NONE; >> - u32 cqn; >> - >> - while (ceqe) { >> - /* Make sure we read CEQ entry after we have checked the >> - * ownership bit >> - */ >> - dma_rmb(); >> - >> - cqn = hr_reg_read(ceqe, CEQE_CQN); >> - >> - hns_roce_cq_completion(hr_dev, cqn); >> - >> - ++eq->cons_index; >> - ceqe_found = IRQ_HANDLED; >> - atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); >> - >> - ceqe = next_ceqe_sw_v2(eq); >> - } >> + tasklet_schedule(&eq->tasklet); >> - update_eq_db(eq); >> - >> - return IRQ_RETVAL(ceqe_found); >> + return IRQ_HANDLED; >> } >> static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) >> @@ -6183,7 +6161,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) >> if (eq->type_flag == HNS_ROCE_CEQ) >> /* Completion event interrupt */ >> - int_work = hns_roce_v2_ceq_int(hr_dev, eq); >> + int_work = hns_roce_v2_ceq_int(eq); >> else >> /* Asynchronous event interrupt */ >> int_work = hns_roce_v2_aeq_int(hr_dev, eq); >> @@ -6551,6 +6529,34 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, >> return ret; >> } >> +static void hns_roce_ceq_task(struct tasklet_struct *task) >> +{ >> + struct hns_roce_eq *eq = from_tasklet(eq, task, tasklet); >> + struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); >> + struct hns_roce_dev *hr_dev = eq->hr_dev; >> + int ceqe_num = 0; >> + u32 cqn; >> + >> + while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) { >> + /* Make sure we read CEQ entry after we have checked the >> + * ownership bit >> + */ >> + dma_rmb(); >> + >> + cqn = hr_reg_read(ceqe, CEQE_CQN); >> + >> + hns_roce_cq_completion(hr_dev, cqn); >> + >> + ++eq->cons_index; >> + ++ceqe_num; >> + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); >> + >> + ceqe = next_ceqe_sw_v2(eq); >> + } >> + >> + update_eq_db(eq); >> +} >> + >> static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, >> int comp_num, int aeq_num, int other_num) >> { >> @@ -6582,21 +6588,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, >> j - other_num - aeq_num); >> for (j = 0; j < irq_num; j++) { >> - if (j < other_num) >> + if (j < other_num) { >> ret = request_irq(hr_dev->irq[j], >> hns_roce_v2_msix_interrupt_abn, >> 0, hr_dev->irq_names[j], hr_dev); >> - >> - else if (j < (other_num + comp_num)) >> + } else if (j < (other_num + comp_num)) { >> + tasklet_setup(&eq_table->eq[j - other_num].tasklet, >> + hns_roce_ceq_task); >> ret = request_irq(eq_table->eq[j - other_num].irq, >> hns_roce_v2_msix_interrupt_eq, >> 0, hr_dev->irq_names[j + aeq_num], >> &eq_table->eq[j - other_num]); >> - else >> + } else { >> ret = request_irq(eq_table->eq[j - other_num].irq, >> hns_roce_v2_msix_interrupt_eq, >> 0, hr_dev->irq_names[j - comp_num], >> &eq_table->eq[j - other_num]); >> + } >> + >> if (ret) { >> dev_err(hr_dev->dev, "request irq error!\n"); >> goto err_request_failed; >> @@ -6606,12 +6615,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, >> return 0; >> err_request_failed: >> - for (j -= 1; j >= 0; j--) >> - if (j < other_num) >> + for (j -= 1; j >= 0; j--) { >> + if (j < other_num) { >> free_irq(hr_dev->irq[j], hr_dev); >> - else >> - free_irq(eq_table->eq[j - other_num].irq, >> - &eq_table->eq[j - other_num]); >> + continue; >> + } >> + free_irq(eq_table->eq[j - other_num].irq, >> + &eq_table->eq[j - other_num]); >> + if (j < other_num + comp_num) >> + tasklet_kill(&eq_table->eq[j - other_num].tasklet); >> + } >> err_kzalloc_failed: >> for (i -= 1; i >= 0; i--) >> @@ -6632,8 +6645,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev) >> for (i = 0; i < hr_dev->caps.num_other_vectors; i++) >> free_irq(hr_dev->irq[i], hr_dev); >> - for (i = 0; i < eq_num; i++) >> + for (i = 0; i < eq_num; i++) { >> free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]); >> + if (i < hr_dev->caps.num_comp_vectors) >> + tasklet_kill(&hr_dev->eq_table.eq[i].tasklet); >> + } >> for (i = 0; i < irq_num; i++) >> kfree(hr_dev->irq_names[i]); >