Hi Mikail, > On Jun 12, 2018, at 9:08 AM, m.malygin@xxxxxxxxx wrote: > > From: Mikhail Malygin <m.malygin@xxxxxxxxx> > > This patch addresses issue causing spinlock recursion in qla_target.c: > 1. qlt_handle_login takes vha->hw->tgt.sess_lock, then calls qlt_schedule_sess_for_deletion > where it tries to take spinlock again. We had posted patch to serialize session deletion with following patches https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git/commit/?id=1ae634eb28533b82f9777a47c1ade44cb8c0182b https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git/commit/?id=d8630bb95f46ea118dede63bd75533faa64f9612 https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git/commit/?id=d8630bb95f46ea118dede63bd75533faa64f9612 However, this patch looks like introduced regression, https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git/commit/?id=1c6cacf4ea6c04a58a0e3057f5ed60c24a4ffeff can you use work_lock as it was before the change and see if that helps with both issue 1 and 2. something like this diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 7ed47800c660..d649f85d9657 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1239,16 +1239,16 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) return; } - spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (sess->deleted == QLA_SESS_DELETED) sess->logout_on_delete = 0; + spin_lock_irqsave(&sess->vha->work_lock, flags); if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + spin_unlock_irqrestore(&sess->vha->work_lock, flags); return; } sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + spin_unlock_irqrestore(&sess->vha->work_lock, flags); sess->disc_state = DSC_DELETE_PEND; > 2. qlt_reset takes the same lock, then calls qlt_schedule_sess_for_deleteion via qlt_clear_tgt_db > > Stacktrace for qlt_handle_login > > BUG: spinlock lockup suspected on CPU#0, swapper/0/0 > lock: 0xc00000c07aa8bec0, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0 > CPU: 0 PID: 0 Comm: swapper/0 Tainted: G OE NX 4.4.132-ttln.24-debug #1 > Call Trace: > [c00000dfff6d7830] [c0000000008060c0] dump_stack+0xb0/0xf0 (unreliable) > [c00000dfff6d7870] [c0000000007ff6ec] spin_dump+0xa8/0xc4 > [c00000dfff6d78e0] [c000000000128320] do_raw_spin_lock+0x140/0x1d0 > [c00000dfff6d7920] [c0000000007f7354] _raw_spin_lock_irqsave+0x34/0x50 > [c00000dfff6d7950] [d00000001edf3220] qlt_schedule_sess_for_deletion+0x90/0x250 [qla2xxx] > [c00000dfff6d79c0] [d00000001edf6b08] qlt_find_sess_invalidate_other+0x1d8/0x230 [qla2xxx] > [c00000dfff6d7a70] [d00000001edf710c] qlt_handle_login+0x5ac/0x760 [qla2xxx] > [c00000dfff6d7b10] [d00000001edf7ccc] qlt_handle_imm_notify+0xa0c/0x10b0 [qla2xxx] > [c00000dfff6d7c00] [d00000001edf85f0] qlt_24xx_atio_pkt+0x280/0x400 [qla2xxx] > [c00000dfff6d7ca0] [d00000001edfa9d8] qlt_24xx_process_atio_queue+0x368/0x7d0 [qla2xxx] > [c00000dfff6d7d80] [d00000001edfb898] qla83xx_msix_atio_q+0x58/0x90 [qla2xxx] > [c00000dfff6d7dc0] [c000000000133cd0] __handle_irq_event_percpu+0xa0/0x2f0 > [c00000dfff6d7e80] [c000000000133f5c] handle_irq_event_percpu+0x3c/0x90 > [c00000dfff6d7ec0] [c000000000134018] handle_irq_event+0x68/0xb0 > [c00000dfff6d7f00] [c000000000139278] handle_fasteoi_irq+0xf8/0x260 > [c00000dfff6d7f40] [c000000000132e80] generic_handle_irq+0x50/0x80 > [c00000dfff6d7f60] [c000000000014c44] __do_irq+0x84/0x1d0 > [c00000dfff6d7f90] [c000000000027924] call_do_irq+0x14/0x24 > [c000000000f13a20] [c000000000014e30] do_IRQ+0xa0/0x120 > [c000000000f13a70] [c000000000002694] hardware_interrupt_common+0x114/0x180 > --- interrupt: 501 at snooze_loop+0xc4/0x1a0 > LR = snooze_loop+0x16c/0x1a0 > [c000000000f13d60] [c00000000063b41c] nap_loop+0x5c/0x120 (unreliable) > [c000000000f13da0] [c000000000637f9c] cpuidle_enter_state+0xbc/0x3d0 > [c000000000f13e00] [c00000000011db10] call_cpuidle+0x50/0x80 > [c000000000f13e20] [c00000000011e138] cpu_startup_entry+0x388/0x490 > [c000000000f13ee0] [c00000000000c260] rest_init+0xb0/0xd0 > [c000000000f13f00] [c000000000aa4070] start_kernel+0x55c/0x578 > [c000000000f13f90] [c000000000008e6c] start_here_common+0x20/0xb4 > nvme nvme0: I/O 782 QID 9 timeout, completion polled > nvme nvme0: I/O 99 QID 12 timeout, completion polled > nvme nvme0: I/O 925 QID 4 timeout, completio > > > Stacktrace for qlt_reset: > > BUG: spinlock recursion on CPU#0, swapper/0/0 > lock: 0xc00000207d5ffec0, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0 > CPU: 0 PID: 0 Comm: swapper/0 Tainted: G OE NX 4.4.132-ttln.25-debug #1 > Call Trace: > [c000003fff71b8d0] [c0000000008060c0] dump_stack+0xb0/0xf0 (unreliable) > [c000003fff71b910] [c0000000007ff6ec] spin_dump+0xa8/0xc4 > [c000003fff71b980] [c0000000001283a4] do_raw_spin_lock+0x1c4/0x1d0 > [c000003fff71b9c0] [c0000000007f7354] _raw_spin_lock_irqsave+0x34/0x50 > [c000003fff71b9f0] [d0000000128933b4] qlt_schedule_sess_for_deletion+0x44/0x80 [qla2xxx] > [c000003fff71ba30] [d000000012893454] qlt_clear_tgt_db+0x64/0x90 [qla2xxx] > [c000003fff71ba60] [d000000012893604] qlt_reset+0x184/0x1f0 [qla2xxx] > [c000003fff71bb10] [d000000012897a2c] qlt_handle_imm_notify+0x74c/0x10b0 [qla2xxx] > [c000003fff71bc00] [d000000012898610] qlt_24xx_atio_pkt+0x280/0x400 [qla2xxx] > [c000003fff71bca0] [d00000001289a9f8] qlt_24xx_process_atio_queue+0x368/0x7d0 [qla2xxx] > [c000003fff71bd80] [d00000001289b8b8] qla83xx_msix_atio_q+0x58/0x90 [qla2xxx] > [c000003fff71bdc0] [c000000000133cd0] __handle_irq_event_percpu+0xa0/0x2f0 > [c000003fff71be80] [c000000000133f5c] handle_irq_event_percpu+0x3c/0x90 > [c000003fff71bec0] [c000000000134018] handle_irq_event+0x68/0xb0 > [c000003fff71bf00] [c000000000139278] handle_fasteoi_irq+0xf8/0x260 > [c000003fff71bf40] [c000000000132e80] generic_handle_irq+0x50/0x80 > [c000003fff71bf60] [c000000000014c44] __do_irq+0x84/0x1d0 > [c000003fff71bf90] [c000000000027924] call_do_irq+0x14/0x24 > [c000000000f13a40] [c000000000014e30] do_IRQ+0xa0/0x120 > [c000000000f13a90] [c000000000002694] hardware_interrupt_common+0x114/0x180 > --- interrupt: 501 at arch_local_irq_restore+0x5c/0x90 > LR = arch_local_irq_restore+0x40/0x90 > [c000000000f13d80] [c000000000f10000] init_thread_union+0x0/0x2000 (unreliable) > [c000000000f13da0] [c000000000637fec] cpuidle_enter_state+0x10c/0x3d0 > [c000000000f13e00] [c00000000011db10] call_cpuidle+0x50/0x80 > [c000000000f13e20] [c00000000011e138] cpu_startup_entry+0x388/0x490 > [c000000000f13ee0] [c00000000000c260] rest_init+0xb0/0xd0 > [c000000000f13f00] [c000000000aa4070] start_kernel+0x55c/0x578 > [c000000000f13f90] [c000000000008e6c] start_here_common+0x20/0xb4 > > Steps to reproduce: > 1. Configure qla card as target and export lun > 2. Connect it to FC switch using both ports. > 3. Connect initiator to the switch using both ports. > 4a. Switch the initiator ports to reproduce the first issue > 4b. Switch the target ports on switch to reproduce the second one > > Signed-off-by: Mikhail Malygin <m.malygin@xxxxxxxxx> > > Fixed qla_reset > --- > drivers/scsi/qla2xxx/qla_target.c | 27 +++++++++++++++++---------- > 1 file changed, 17 insertions(+), 10 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c > index 025dc2d3f3de..75f9c648255a 100644 > --- a/drivers/scsi/qla2xxx/qla_target.c > +++ b/drivers/scsi/qla2xxx/qla_target.c > @@ -1227,11 +1227,9 @@ static void qla24xx_chk_fcp_state(struct fc_port *sess) > } > } > > -void qlt_schedule_sess_for_deletion(struct fc_port *sess) > +static void qlt_schedule_sess_for_deletion_locked(struct fc_port *sess) > { > struct qla_tgt *tgt = sess->tgt; > - struct qla_hw_data *ha = sess->vha->hw; > - unsigned long flags; > > if (sess->disc_state == DSC_DELETE_PEND) > return; > @@ -1247,16 +1245,13 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) > return; > } > > - spin_lock_irqsave(&ha->tgt.sess_lock, flags); > if (sess->deleted == QLA_SESS_DELETED) > sess->logout_on_delete = 0; > > if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { > - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); > return; > } > sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; > - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); > > sess->disc_state = DSC_DELETE_PEND; > > @@ -1269,6 +1264,16 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) > WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work)); > } > > +void qlt_schedule_sess_for_deletion(struct fc_port *sess) > +{ > + struct qla_hw_data *ha = sess->vha->hw; > + unsigned long flags; > + > + spin_lock_irqsave(&ha->tgt.sess_lock, flags); > + qlt_schedule_sess_for_deletion_locked(sess); > + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); > +} > + > static void qlt_clear_tgt_db(struct qla_tgt *tgt) > { > struct fc_port *sess; > @@ -1276,7 +1281,7 @@ static void qlt_clear_tgt_db(struct qla_tgt *tgt) > > list_for_each_entry(sess, &vha->vp_fcports, list) { > if (sess->se_sess) > - qlt_schedule_sess_for_deletion(sess); > + qlt_schedule_sess_for_deletion_locked(sess); > } > > /* At this point tgt could be already dead */ > @@ -1513,7 +1518,9 @@ int qlt_stop_phase1(struct qla_tgt *tgt) > */ > mutex_lock(&vha->vha_tgt.tgt_mutex); > tgt->tgt_stop = 1; > + spin_lock_irqsave(&ha->tgt.sess_lock, flags); > qlt_clear_tgt_db(tgt); > + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); > mutex_unlock(&vha->vha_tgt.tgt_mutex); > mutex_unlock(&qla_tgt_mutex); > > @@ -4527,7 +4534,7 @@ qlt_find_sess_invalidate_other(scsi_qla_host_t *vha, uint64_t wwn, > * might have cleared it when requested this session > * deletion, so don't touch it > */ > - qlt_schedule_sess_for_deletion(other_sess); > + qlt_schedule_sess_for_deletion_locked(other_sess); > } else { > /* > * Another wwn used to have our s_id/loop_id > @@ -4540,7 +4547,7 @@ qlt_find_sess_invalidate_other(scsi_qla_host_t *vha, uint64_t wwn, > other_sess->keep_nport_handle = 1; > if (other_sess->disc_state != DSC_DELETED) > *conflict_sess = other_sess; > - qlt_schedule_sess_for_deletion(other_sess); > + qlt_schedule_sess_for_deletion_locked(other_sess); > } > continue; > } > @@ -4554,7 +4561,7 @@ qlt_find_sess_invalidate_other(scsi_qla_host_t *vha, uint64_t wwn, > > /* Same loop_id but different s_id > * Ok to kill and logout */ > - qlt_schedule_sess_for_deletion(other_sess); > + qlt_schedule_sess_for_deletion_locked(other_sess); > } > } > > -- > 2.15.1 (Apple Git-101) > Thanks, - Himanshu