On Wed, Apr 13, 2022 at 2:18 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: > > On 4/13/22 01:11, Zhu Yanjun wrote: > > On Wed, Apr 13, 2022 at 2:03 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: > >> > >> On 4/13/22 00:58, Zhu Yanjun wrote: > >>> On Wed, Apr 13, 2022 at 1:31 PM Bob Pearson <rpearsonhpe@xxxxxxxxx> wrote: > >>>> > >>>> rxe_mcast.c currently uses _irqsave spinlocks for rxe->mcg_lock > >>>> while rxe_recv.c uses _bh spinlocks for the same lock. Adding > >>>> tracing shows that the code in rxe_mcast.c is all executed in_task() > >>>> context while the code in rxe_recv.c that refers to the lock > >>>> executes in softirq context. This causes a lockdep warning in code > >>>> that executes multicast I/O including blktests check srp. > >>>> > >>>> Change the locking of rxe->mcg_lock in rxe_mcast.c to use > >>>> spin_(un)lock_bh(). > >>> > >>> Now RXE is not stable. We should focus on the problem of RXE. > >>> > >>> Zhu Yanjun > >> > >> This a real bug and triggers lockdep warnings when I run blktests srp. > >> The blktests test suite apparently uses multicast. It is obviously wrong > >> you can't use both _bh and _irqsave locks and pass lockdep checking. > >> > >> What tests are not running correctly for you? > > > > The crash related with mr is resolved? > > I am pursuing getting everything to pass with the rcu_read_lock() patch. > Currently I have blktests, perftests and pyverbs tests all passing. > I have rping running but it hangs. I have WARN_ON's for mr = 0 but I > am not seeing them show up so there absolutely no trace output from rping. > It just hangs after a few minutes with no dmesg. I have lockdep turned on and > as just mentioned WARN_ON's for mr = 0. My suspicion is that this is > related to a race during shutdown but I haven't traced it to the root cause Please focus on this mr crash. I have made tests with reverting the related commit with the xarray. This mr crash disappeared. It seems that this rping mr crash is related with xarray. I am still working on it. Zhu Yanjun > yet. I don't trust the responder resources code at all. > > I am done for today here though. > > Bob > > > > Zhu Yanjun > > > >> > >> Bob > >>> > >>>> > >>>> With this change the following locks in rdma_rxe which are all _bh > >>>> show no lockdep warnings. > >>>> > >>>> atomic_ops_lock > >>>> mw->lock > >>>> port->port_lock > >>>> qp->state_lock > >>>> rxe->mcg_lock > >>>> rxe->mmap_offset_lock > >>>> rxe->pending_lock > >>>> task->state_lock > >>>> > >>>> The only other remaining lock is pool->xa.xa_lock which > >>>> must either be some combination of _bh and _irq types depending > >>>> on the object type (== ah or not) or plain spin_lock if > >>>> the read side operations are converted to use rcu_read_lock(). > >>>> > >>>> Fixes: 6090a0c4c7c6 ("RDMA/rxe: Cleanup rxe_mcast.c") > >>>> Signed-off-by: Bob Pearson <rpearsonhpe@xxxxxxxxx> > >>>> --- > >>>> drivers/infiniband/sw/rxe/rxe_mcast.c | 36 +++++++++++---------------- > >>>> 1 file changed, 15 insertions(+), 21 deletions(-) > >>>> > >>>> diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> index ae8f11cb704a..7f50566b8d89 100644 > >>>> --- a/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c > >>>> @@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, > >>>> struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> { > >>>> struct rxe_mcg *mcg; > >>>> - unsigned long flags; > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> mcg = __rxe_lookup_mcg(rxe, mgid); > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> > >>>> return mcg; > >>>> } > >>>> @@ -198,7 +197,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, > >>>> static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> { > >>>> struct rxe_mcg *mcg, *tmp; > >>>> - unsigned long flags; > >>>> int err; > >>>> > >>>> if (rxe->attr.max_mcast_grp == 0) > >>>> @@ -214,7 +212,7 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> if (!mcg) > >>>> return ERR_PTR(-ENOMEM); > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> /* re-check to see if someone else just added it */ > >>>> tmp = __rxe_lookup_mcg(rxe, mgid); > >>>> if (tmp) { > >>>> @@ -232,12 +230,12 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) > >>>> if (err) > >>>> goto err_dec; > >>>> out: > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return mcg; > >>>> > >>>> err_dec: > >>>> atomic_dec(&rxe->mcg_num); > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> kfree(mcg); > >>>> return ERR_PTR(err); > >>>> } > >>>> @@ -280,11 +278,9 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> */ > >>>> static void rxe_destroy_mcg(struct rxe_mcg *mcg) > >>>> { > >>>> - unsigned long flags; > >>>> - > >>>> - spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&mcg->rxe->mcg_lock); > >>>> __rxe_destroy_mcg(mcg); > >>>> - spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&mcg->rxe->mcg_lock); > >>>> } > >>>> > >>>> /** > >>>> @@ -339,25 +335,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> { > >>>> struct rxe_dev *rxe = mcg->rxe; > >>>> struct rxe_mca *mca, *tmp; > >>>> - unsigned long flags; > >>>> int err; > >>>> > >>>> /* check to see if the qp is already a member of the group */ > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> list_for_each_entry(mca, &mcg->qp_list, qp_list) { > >>>> if (mca->qp == qp) { > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return 0; > >>>> } > >>>> } > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> > >>>> /* speculative alloc new mca without using GFP_ATOMIC */ > >>>> mca = kzalloc(sizeof(*mca), GFP_KERNEL); > >>>> if (!mca) > >>>> return -ENOMEM; > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> /* re-check to see if someone else just attached qp */ > >>>> list_for_each_entry(tmp, &mcg->qp_list, qp_list) { > >>>> if (tmp->qp == qp) { > >>>> @@ -371,7 +366,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> if (err) > >>>> kfree(mca); > >>>> out: > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return err; > >>>> } > >>>> > >>>> @@ -405,9 +400,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> { > >>>> struct rxe_dev *rxe = mcg->rxe; > >>>> struct rxe_mca *mca, *tmp; > >>>> - unsigned long flags; > >>>> > >>>> - spin_lock_irqsave(&rxe->mcg_lock, flags); > >>>> + spin_lock_bh(&rxe->mcg_lock); > >>>> list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { > >>>> if (mca->qp == qp) { > >>>> __rxe_cleanup_mca(mca, mcg); > >>>> @@ -421,13 +415,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) > >>>> if (atomic_read(&mcg->qp_num) <= 0) > >>>> __rxe_destroy_mcg(mcg); > >>>> > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return 0; > >>>> } > >>>> } > >>>> > >>>> /* we didn't find the qp on the list */ > >>>> - spin_unlock_irqrestore(&rxe->mcg_lock, flags); > >>>> + spin_unlock_bh(&rxe->mcg_lock); > >>>> return -EINVAL; > >>>> } > >>>> > >>>> -- > >>>> 2.32.0 > >>>> > >> >