Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



在 2024/7/11 9:01, Honggang LI 写道:
On Thu, Jul 11, 2024 at 11:06:06AM +0800, Greg Sword wrote:
Subject: Re: [PATCH] RDMA/rxe: Restore tasklet call for rxe_cq.c
From: Greg Sword <gregsword0@xxxxxxxxx>
Date: Thu, 11 Jul 2024 11:06:06 +0800

On Thu, Jul 11, 2024 at 9:41 AM Honggang LI <honggangli@xxxxxxx> wrote:

If ib_req_notify_cq() was called in complete handler, deadlock occurs
in receive path.

rxe_req_notify_cq+0x21/0x70 [rdma_rxe]
krping_cq_event_handler+0x26f/0x2c0 [rdma_krping]

What is rdma_krping? What is the deadlock?

https://github.com/larrystevenwise/krping.git

Please explain the deadlock in details.

    88 int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
    89 {
    90         struct ib_event ev;
    91         int full;
    92         void *addr;
    93         unsigned long flags;
    94
    95         spin_lock_irqsave(&cq->cq_lock, flags);  // Lock!
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    96
    97         full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT);
    98         if (unlikely(full)) {
    99                 rxe_err_cq(cq, "queue full\n");
   100                 spin_unlock_irqrestore(&cq->cq_lock, flags);
   101                 if (cq->ibcq.event_handler) {
   102                         ev.device = cq->ibcq.device;
   103                         ev.element.cq = &cq->ibcq;
   104                         ev.event = IB_EVENT_CQ_ERR;
   105                         cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
   106                 }
   107
   108                 return -EBUSY;
   109         }
   110
   111         addr = queue_producer_addr(cq->queue, QUEUE_TYPE_TO_CLIENT);
   112         memcpy(addr, cqe, sizeof(*cqe));
   113
   114         queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT);
   115
   116         if ((cq->notify & IB_CQ_NEXT_COMP) ||
   117             (cq->notify & IB_CQ_SOLICITED && solicited)) {
   118                 cq->notify = 0;
   119                 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
		      call the complete handler   krping_cq_event_handler()
   120         }
   121
   122         spin_unlock_irqrestore(&cq->cq_lock, flags);



static void krping_cq_event_handler(struct ib_cq *cq, void *ctx)
{
         struct krping_cb *cb = ctx;
         struct ib_wc wc;
         const struct ib_recv_wr *bad_wr;
         int ret;

         BUG_ON(cb->cq != cq);
         if (cb->state == ERROR) {
                 printk(KERN_ERR PFX "cq completion in ERROR state\n");
                 return;
         }
         if (cb->frtest) {
                 printk(KERN_ERR PFX "cq completion event in frtest!\n");
                 return;
         }
         if (!cb->wlat && !cb->rlat && !cb->bw)
                 ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP) > 		^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

IMO, here, we can use a BH workqueue to execute this notification? Or add an event handler?

Please reference other ULP kernel modules.

Thanks,
Zhu Yanjun

         while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) {
                 if (wc.status) {

static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
         struct rxe_cq *cq = to_rcq(ibcq);
         int ret = 0;
         int empty;
         unsigned long irq_flags;

         spin_lock_irqsave(&cq->cq_lock, irq_flags);
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Deadlock






[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux