Re: [PATCH for-next] RDMA/rxe: Fix "RDMA/rxe: Cleanup rxe_mcast.c"

Zhu Yanjun <zyjzyj2000@xxxxxxxxx> · Wed, 13 Apr 2022 14:30:55 +0800

On Wed, Apr 13, 2022 at 2:18 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
>
> On 4/13/22 01:11, Zhu Yanjun wrote:
> > On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
> >>
> >> On 4/13/22 00:58, Zhu Yanjun wrote:
> >>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
> >>>>
> >>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
> >>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
> >>>> tracing shows that the code in rxe_mcast.c is all executed in_task()
> >>>> context while the code in rxe_recv.c that refers to the lock
> >>>> executes in softirq context. This causes a lockdep warning in code
> >>>> that executes multicast I/O including blktests check srp.
> >>>>
> >>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
> >>>> spin_(un)lock_bh().
> >>>
> >>> Now RXE is not stable. We should focus on the problem of RXE.
> >>>
> >>> Zhu Yanjun
> >>
> >> This a real bug and triggers lockdep warnings when I run blktests srp.
> >> The blktests test suite apparently uses multicast. It is obviously wrong
> >> you can't use both _bh and _irqsave locks and pass lockdep checking.
> >>
> >> What tests are not running correctly for you?
> >
> > The crash related with mr is resolved?
>
> I am pursuing getting everything to pass with the rcu_read_lock() patch.
> Currently I have blktests, perftests and pyverbs tests all passing.
> I have rping running but it hangs. I have WARN_ON's for mr = 0 but I
> am not seeing them show up so there absolutely no trace output from rping.
> It just hangs after a few minutes with no dmesg. I have lockdep turned on and
> as just mentioned WARN_ON's for mr = 0. My suspicion is that this is
> related to a race during shutdown but I haven't traced it to the root cause

Please focus on this mr crash. I have made tests with reverting the
related commit with
the xarray. This mr crash disappeared.

It seems that this rping mr crash is related with xarray.

I am still working on it.

Zhu Yanjun

> yet. I don't trust the responder resources code at all.
>
> I am done for today here though.
>
> Bob
> >
> > Zhu Yanjun
> >
> >>
> >> Bob
> >>>
> >>>>
> >>>> With this change the following locks in rdma_rxe which are all _bh
> >>>> show no lockdep warnings.
> >>>>
> >>>>         atomic_ops_lock
> >>>>         mw->lock
> >>>>         port->port_lock
> >>>>         qp->state_lock
> >>>>         rxe->mcg_lock
> >>>>         rxe->mmap_offset_lock
> >>>>         rxe->pending_lock
> >>>>         task->state_lock
> >>>>
> >>>> The only other remaining lock is pool->xa.xa_lock which
> >>>> must either be some combination of _bh and _irq types depending
> >>>> on the object type (== ah or not) or plain spin_lock if
> >>>> the read side operations are converted to use rcu_read_lock().
> >>>>
> >>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
> >>>> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
> >>>> ---
> >>>>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
> >>>>  1 file changed, 15 insertions(+), 21 deletions(-)
> >>>>
> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> index ae8f11cb704a..7f50566b8d89 100644
> >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
> >>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
> >>>>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>  {
> >>>>         struct rxe_mcg *mcg;
> >>>> -       unsigned long flags;
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         mcg = __rxe_lookup_mcg(rxe, mgid);
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>
> >>>>         return mcg;
> >>>>  }
> >>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
> >>>>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>  {
> >>>>         struct rxe_mcg *mcg, *tmp;
> >>>> -       unsigned long flags;
> >>>>         int err;
> >>>>
> >>>>         if (rxe->attr.max_mcast_grp == 0)
> >>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>         if (!mcg)
> >>>>                 return ERR_PTR(-ENOMEM);
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         /* re-check to see if someone else just added it */
> >>>>         tmp = __rxe_lookup_mcg(rxe, mgid);
> >>>>         if (tmp) {
> >>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
> >>>>         if (err)
> >>>>                 goto err_dec;
> >>>>  out:
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         return mcg;
> >>>>
> >>>>  err_dec:
> >>>>         atomic_dec(&rxe->mcg_num);
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         kfree(mcg);
> >>>>         return ERR_PTR(err);
> >>>>  }
> >>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>>>   */
> >>>>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
> >>>>  {
> >>>> -       unsigned long flags;
> >>>> -
> >>>> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&mcg->rxe->mcg_lock);
> >>>>         __rxe_destroy_mcg(mcg);
> >>>> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
> >>>>  }
> >>>>
> >>>>  /**
> >>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>  {
> >>>>         struct rxe_dev *rxe = mcg->rxe;
> >>>>         struct rxe_mca *mca, *tmp;
> >>>> -       unsigned long flags;
> >>>>         int err;
> >>>>
> >>>>         /* check to see if the qp is already a member of the group */
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
> >>>>                 if (mca->qp == qp) {
> >>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +                       spin_unlock_bh(&rxe->mcg_lock);
> >>>>                         return 0;
> >>>>                 }
> >>>>         }
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>
> >>>>         /* speculative alloc new mca without using GFP_ATOMIC */
> >>>>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
> >>>>         if (!mca)
> >>>>                 return -ENOMEM;
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         /* re-check to see if someone else just attached qp */
> >>>>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
> >>>>                 if (tmp->qp == qp) {
> >>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>         if (err)
> >>>>                 kfree(mca);
> >>>>  out:
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         return err;
> >>>>  }
> >>>>
> >>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>  {
> >>>>         struct rxe_dev *rxe = mcg->rxe;
> >>>>         struct rxe_mca *mca, *tmp;
> >>>> -       unsigned long flags;
> >>>>
> >>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
> >>>> +       spin_lock_bh(&rxe->mcg_lock);
> >>>>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
> >>>>                 if (mca->qp == qp) {
> >>>>                         __rxe_cleanup_mca(mca, mcg);
> >>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
> >>>>                         if (atomic_read(&mcg->qp_num) <= 0)
> >>>>                                 __rxe_destroy_mcg(mcg);
> >>>>
> >>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +                       spin_unlock_bh(&rxe->mcg_lock);
> >>>>                         return 0;
> >>>>                 }
> >>>>         }
> >>>>
> >>>>         /* we didn't find the qp on the list */
> >>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
> >>>> +       spin_unlock_bh(&rxe->mcg_lock);
> >>>>         return -EINVAL;
> >>>>  }
> >>>>
> >>>> --
> >>>> 2.32.0
> >>>>
> >>
>