Re: [PATCH for-next] RDMA/rxe: Fix "RDMA/rxe: Cleanup rxe_mcast.c"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/13/22 01:11, Zhu Yanjun wrote:
> On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
>>
>> On 4/13/22 00:58, Zhu Yanjun wrote:
>>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote:
>>>>
>>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock
>>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding
>>>> tracing shows that the code in rxe_mcast.c is all executed in_task()
>>>> context while the code in rxe_recv.c that refers to the lock
>>>> executes in softirq context. This causes a lockdep warning in code
>>>> that executes multicast I/O including blktests check srp.
>>>>
>>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use
>>>> spin_(un)lock_bh().
>>>
>>> Now RXE is not stable. We should focus on the problem of RXE.
>>>
>>> Zhu Yanjun
>>
>> This a real bug and triggers lockdep warnings when I run blktests srp.
>> The blktests test suite apparently uses multicast. It is obviously wrong
>> you can't use both _bh and _irqsave locks and pass lockdep checking.
>>
>> What tests are not running correctly for you?
> 
> The crash related with mr is resolved?

I am pursuing getting everything to pass with the rcu_read_lock() patch.
Currently I have blktests, perftests and pyverbs tests all passing.
I have rping running but it hangs. I have WARN_ON's for mr = 0 but I
am not seeing them show up so there absolutely no trace output from rping.
It just hangs after a few minutes with no dmesg. I have lockdep turned on and
as just mentioned WARN_ON's for mr = 0. My suspicion is that this is
related to a race during shutdown but I haven't traced it to the root cause
yet. I don't trust the responder resources code at all.

I am done for today here though.

Bob
> 
> Zhu Yanjun
> 
>>
>> Bob
>>>
>>>>
>>>> With this change the following locks in rdma_rxe which are all _bh
>>>> show no lockdep warnings.
>>>>
>>>>         atomic_ops_lock
>>>>         mw->lock
>>>>         port->port_lock
>>>>         qp->state_lock
>>>>         rxe->mcg_lock
>>>>         rxe->mmap_offset_lock
>>>>         rxe->pending_lock
>>>>         task->state_lock
>>>>
>>>> The only other remaining lock is pool->xa.xa_lock which
>>>> must either be some combination of _bh and _irq types depending
>>>> on the object type (== ah or not) or plain spin_lock if
>>>> the read side operations are converted to use rcu_read_lock().
>>>>
>>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c")
>>>> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx>
>>>> ---
>>>>  drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++----------------
>>>>  1 file changed, 15 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> index ae8f11cb704a..7f50566b8d89 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
>>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
>>>>  struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>  {
>>>>         struct rxe_mcg *mcg;
>>>> -       unsigned long flags;
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         mcg = __rxe_lookup_mcg(rxe, mgid);
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>
>>>>         return mcg;
>>>>  }
>>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
>>>>  static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>  {
>>>>         struct rxe_mcg *mcg, *tmp;
>>>> -       unsigned long flags;
>>>>         int err;
>>>>
>>>>         if (rxe->attr.max_mcast_grp == 0)
>>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>         if (!mcg)
>>>>                 return ERR_PTR(-ENOMEM);
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         /* re-check to see if someone else just added it */
>>>>         tmp = __rxe_lookup_mcg(rxe, mgid);
>>>>         if (tmp) {
>>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
>>>>         if (err)
>>>>                 goto err_dec;
>>>>  out:
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         return mcg;
>>>>
>>>>  err_dec:
>>>>         atomic_dec(&rxe->mcg_num);
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         kfree(mcg);
>>>>         return ERR_PTR(err);
>>>>  }
>>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>   */
>>>>  static void rxe_destroy_mcg(struct rxe_mcg *mcg)
>>>>  {
>>>> -       unsigned long flags;
>>>> -
>>>> -       spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&mcg->rxe->mcg_lock);
>>>>         __rxe_destroy_mcg(mcg);
>>>> -       spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&mcg->rxe->mcg_lock);
>>>>  }
>>>>
>>>>  /**
>>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>  {
>>>>         struct rxe_dev *rxe = mcg->rxe;
>>>>         struct rxe_mca *mca, *tmp;
>>>> -       unsigned long flags;
>>>>         int err;
>>>>
>>>>         /* check to see if the qp is already a member of the group */
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         list_for_each_entry(mca, &mcg->qp_list, qp_list) {
>>>>                 if (mca->qp == qp) {
>>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +                       spin_unlock_bh(&rxe->mcg_lock);
>>>>                         return 0;
>>>>                 }
>>>>         }
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>
>>>>         /* speculative alloc new mca without using GFP_ATOMIC */
>>>>         mca = kzalloc(sizeof(*mca), GFP_KERNEL);
>>>>         if (!mca)
>>>>                 return -ENOMEM;
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         /* re-check to see if someone else just attached qp */
>>>>         list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
>>>>                 if (tmp->qp == qp) {
>>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>         if (err)
>>>>                 kfree(mca);
>>>>  out:
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         return err;
>>>>  }
>>>>
>>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>  {
>>>>         struct rxe_dev *rxe = mcg->rxe;
>>>>         struct rxe_mca *mca, *tmp;
>>>> -       unsigned long flags;
>>>>
>>>> -       spin_lock_irqsave(&rxe->mcg_lock, flags);
>>>> +       spin_lock_bh(&rxe->mcg_lock);
>>>>         list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
>>>>                 if (mca->qp == qp) {
>>>>                         __rxe_cleanup_mca(mca, mcg);
>>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
>>>>                         if (atomic_read(&mcg->qp_num) <= 0)
>>>>                                 __rxe_destroy_mcg(mcg);
>>>>
>>>> -                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +                       spin_unlock_bh(&rxe->mcg_lock);
>>>>                         return 0;
>>>>                 }
>>>>         }
>>>>
>>>>         /* we didn't find the qp on the list */
>>>> -       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
>>>> +       spin_unlock_bh(&rxe->mcg_lock);
>>>>         return -EINVAL;
>>>>  }
>>>>
>>>> --
>>>> 2.32.0
>>>>
>>




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux