On 4/13/22 01:11, Zhu Yanjun wrote: > On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: >> >> On 4/13/22 00:58, Zhu Yanjun wrote: >>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: >>>> >>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock >>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding >>>> tracing shows that the code in rxe_mcast.c is all executed in_task() >>>> context while the code in rxe_recv.c that refers to the lock >>>> executes in softirq context. This causes a lockdep warning in code >>>> that executes multicast I/O including blktests check srp. >>>> >>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use >>>> spin_(un)lock_bh(). >>> >>> Now RXE is not stable. We should focus on the problem of RXE. >>> >>> Zhu Yanjun >> >> This a real bug and triggers lockdep warnings when I run blktests srp. >> The blktests test suite apparently uses multicast. It is obviously wrong >> you can't use both _bh and _irqsave locks and pass lockdep checking. >> >> What tests are not running correctly for you? > > The crash related with mr is resolved? I am pursuing getting everything to pass with the rcu_read_lock() patch. Currently I have blktests, perftests and pyverbs tests all passing. I have rping running but it hangs. I have WARN_ON's for mr = 0 but I am not seeing them show up so there absolutely no trace output from rping. It just hangs after a few minutes with no dmesg. I have lockdep turned on and as just mentioned WARN_ON's for mr = 0. My suspicion is that this is related to a race during shutdown but I haven't traced it to the root cause yet. I don't trust the responder resources code at all. I am done for today here though. Bob > > Zhu Yanjun > >> >> Bob >>> >>>> >>>> With this change the following locks in rdma_rxe which are all _bh >>>> show no lockdep warnings. >>>> >>>> atomic_ops_lock >>>> mw->lock >>>> port->port_lock >>>> qp->state_lock >>>> rxe->mcg_lock >>>> rxe->mmap_offset_lock >>>> rxe->pending_lock >>>> task->state_lock >>>> >>>> The only other remaining lock is pool->xa.xa_lock which >>>> must either be some combination of _bh and _irq types depending >>>> on the object type (== ah or not) or plain spin_lock if >>>> the read side operations are converted to use rcu_read_lock(). >>>> >>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") >>>> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> >>>> --- >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- >>>> 1 file changed, 15 insertions(+), 21 deletions(-) >>>> >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> index ae8f11cb704a..7f50566b8d89 100644 >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c >>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, >>>> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> { >>>> struct rxe_mcg *mcg; >>>> - unsigned long flags; >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> mcg = __rxe_lookup_mcg(rxe, mgid); >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> >>>> return mcg; >>>> } >>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, >>>> static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> { >>>> struct rxe_mcg *mcg, *tmp; >>>> - unsigned long flags; >>>> int err; >>>> >>>> if (rxe->attr.max_mcast_grp == 0) >>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> if (!mcg) >>>> return ERR_PTR(-ENOMEM); >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> /* re-check to see if someone else just added it */ >>>> tmp = __rxe_lookup_mcg(rxe, mgid); >>>> if (tmp) { >>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) >>>> if (err) >>>> goto err_dec; >>>> out: >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return mcg; >>>> >>>> err_dec: >>>> atomic_dec(&rxe->mcg_num); >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> kfree(mcg); >>>> return ERR_PTR(err); >>>> } >>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> */ >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) >>>> { >>>> - unsigned long flags; >>>> - >>>> - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); >>>> + spin_lock_bh(&mcg->rxe->mcg_lock); >>>> __rxe_destroy_mcg(mcg); >>>> - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&mcg->rxe->mcg_lock); >>>> } >>>> >>>> /** >>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> { >>>> struct rxe_dev *rxe = mcg->rxe; >>>> struct rxe_mca *mca, *tmp; >>>> - unsigned long flags; >>>> int err; >>>> >>>> /* check to see if the qp is already a member of the group */ >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> list_for_each_entry(mca, &mcg->qp_list, qp_list) { >>>> if (mca->qp == qp) { >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return 0; >>>> } >>>> } >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> >>>> /* speculative alloc new mca without using GFP_ATOMIC */ >>>> mca = kzalloc(sizeof(*mca), GFP_KERNEL); >>>> if (!mca) >>>> return -ENOMEM; >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> /* re-check to see if someone else just attached qp */ >>>> list_for_each_entry(tmp, &mcg->qp_list, qp_list) { >>>> if (tmp->qp == qp) { >>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> if (err) >>>> kfree(mca); >>>> out: >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return err; >>>> } >>>> >>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> { >>>> struct rxe_dev *rxe = mcg->rxe; >>>> struct rxe_mca *mca, *tmp; >>>> - unsigned long flags; >>>> >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); >>>> + spin_lock_bh(&rxe->mcg_lock); >>>> list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { >>>> if (mca->qp == qp) { >>>> __rxe_cleanup_mca(mca, mcg); >>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) >>>> if (atomic_read(&mcg->qp_num) <= 0) >>>> __rxe_destroy_mcg(mcg); >>>> >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return 0; >>>> } >>>> } >>>> >>>> /* we didn't find the qp on the list */ >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); >>>> + spin_unlock_bh(&rxe->mcg_lock); >>>> return -EINVAL; >>>> } >>>> >>>> -- >>>> 2.32.0 >>>> >>