Hi Jinpu, Thanks for the review. We will make changes accordingly in V2 patch set commit message. Thanks, Ajish -----Original Message----- From: Jinpu Wang <jinpu.wang@xxxxxxxxx> Sent: Monday, August 30, 2021 01:52 PM To: Ajish Koshy - I30923 <Ajish.Koshy@xxxxxxxxxxxxx> Cc: Linux SCSI Mailinglist <linux-scsi@xxxxxxxxxxxxxxx>; Vasanthalakshmi Tharmarajan - I30664 <Vasanthalakshmi.Tharmarajan@xxxxxxxxxxxxx>; Viswas G - I30667 <Viswas.G@xxxxxxxxxxxxx>; Ruksar Devadi - I52327 <Ruksar.devadi@xxxxxxxxxxxxx>; Ashokkumar N - X53535 <Ashokkumar.N@xxxxxxxxxxxxx>; Jinpu Wang <jinpu.wang@xxxxxxxxxxxxxxx> Subject: Re: [PATCH 2/4] scsi: pm80xx: fix lockup due to commit <1f02beff224e> [You don't often get email from jinpu.wang@xxxxxxxxx. Learn why this is important at http://aka.ms/LearnAboutSenderIdentification.] EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe On Mon, Aug 23, 2021 at 9:28 AM Ajish Koshy <Ajish.Koshy@xxxxxxxxxxxxx> wrote: > > Upstream commit <1f02beff224e> introduced a lock per outbound queue, > where the driver before that was using a global lock for all outbound > queues. While processing the IO responses and events, driver takes the > outbound queue spinlock and later it is supposed to release the same > spin lock in > pm8001_ccb_task_free_done() before calling command done(). > Since the older code was using a global lock, > pm8001_ccb_task_free_done() was also releasing the global spin lock. > With the commit <1f02beff224e>, > pm8001_ccb_task_free_done() remains the same and it was still using > the global lock. > > So when driver completes a SATA command,the global spinlock will be in > a locked state. > mpi_sata_completion()->spin_lock(&pm8001_ha->lock); > > Later when driver gets a scsi command for SATA drive, > pm8001_task_exec() tries to acquire the global lock and leads to > lockup crash. > > Signed-off-by: Ajish Koshy <Ajish.Koshy@xxxxxxxxxxxxx> > Signed-off-by: Viswas G <Viswas.G@xxxxxxxxxxxxx> The code looks correct, just please refer the commit with standard way eg: 1f02beff224e ("scsi: pm80xx: Remove global lock from outbound queue processing") Please also add the fixes tag as above, so the stable tree can pick it up. Thanks! > --- > drivers/scsi/pm8001/pm8001_sas.h | 3 +- > drivers/scsi/pm8001/pm80xx_hwi.c | 53 ++++++++++++++++++++++++++------ > 2 files changed, 45 insertions(+), 11 deletions(-) > > diff --git a/drivers/scsi/pm8001/pm8001_sas.h > b/drivers/scsi/pm8001/pm8001_sas.h > index 1a016a421280..3274d88a9ccc 100644 > --- a/drivers/scsi/pm8001/pm8001_sas.h > +++ b/drivers/scsi/pm8001/pm8001_sas.h > @@ -458,6 +458,7 @@ struct outbound_queue_table { > __le32 producer_index; > u32 consumer_idx; > spinlock_t oq_lock; > + unsigned long lock_flags; > }; > struct pm8001_hba_memspace { > void __iomem *memvirtaddr; > @@ -740,9 +741,7 @@ pm8001_ccb_task_free_done(struct pm8001_hba_info > *pm8001_ha, { > pm8001_ccb_task_free(pm8001_ha, task, ccb, ccb_idx); > smp_mb(); /*in order to force CPU ordering*/ > - spin_unlock(&pm8001_ha->lock); > task->task_done(task); > - spin_lock(&pm8001_ha->lock); > } > > #endif > diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c > b/drivers/scsi/pm8001/pm80xx_hwi.c > index cec932f830b8..1ae2f5c6042c 100644 > --- a/drivers/scsi/pm8001/pm80xx_hwi.c > +++ b/drivers/scsi/pm8001/pm80xx_hwi.c > @@ -2379,7 +2379,8 @@ static void mpi_ssp_event(struct pm8001_hba_info > *pm8001_ha, void *piomb) > > /*See the comments for mpi_ssp_completion */ static void > -mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > +mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, > + struct outbound_queue_table *circularQ, void *piomb) > { > struct sas_task *t; > struct pm8001_ccb_info *ccb; > @@ -2616,7 +2617,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); > ts->resp = SAS_TASK_UNDELIVERED; > ts->stat = SAS_QUEUE_FULL; > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, > tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > return; > } > break; > @@ -2632,7 +2637,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); > ts->resp = SAS_TASK_UNDELIVERED; > ts->stat = SAS_QUEUE_FULL; > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, > tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > return; > } > break; > @@ -2656,7 +2665,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > IO_OPEN_CNX_ERROR_STP_RESOURCES_BUSY); > ts->resp = SAS_TASK_UNDELIVERED; > ts->stat = SAS_QUEUE_FULL; > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, > tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > return; > } > break; > @@ -2727,7 +2740,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > IO_DS_NON_OPERATIONAL); > ts->resp = SAS_TASK_UNDELIVERED; > ts->stat = SAS_QUEUE_FULL; > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, > tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > return; > } > break; > @@ -2747,7 +2764,11 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > IO_DS_IN_ERROR); > ts->resp = SAS_TASK_UNDELIVERED; > ts->stat = SAS_QUEUE_FULL; > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, > tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > return; > } > break; > @@ -2785,12 +2806,17 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) > pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); > } else { > spin_unlock_irqrestore(&t->task_state_lock, flags); > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > } > } > > /*See the comments for mpi_ssp_completion */ -static void > mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) > +static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, > + struct outbound_queue_table *circularQ, void *piomb) > { > struct sas_task *t; > struct task_status_struct *ts; @@ -2890,7 +2916,11 @@ static > void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) > IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); > ts->resp = SAS_TASK_COMPLETE; > ts->stat = SAS_QUEUE_FULL; > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, > tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > return; > } > break; > @@ -3002,7 +3032,11 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) > pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); > } else { > spin_unlock_irqrestore(&t->task_state_lock, flags); > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); > + spin_lock_irqsave(&circularQ->oq_lock, > + circularQ->lock_flags); > } > } > > @@ -3906,7 +3940,8 @@ static int ssp_coalesced_comp_resp(struct pm8001_hba_info *pm8001_ha, > * @pm8001_ha: our hba card information > * @piomb: IO message buffer > */ > -static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void > *piomb) > +static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, > + struct outbound_queue_table *circularQ, void *piomb) > { > __le32 pHeader = *(__le32 *)piomb; > u32 opc = (u32)((le32_to_cpu(pHeader)) & 0xFFF); @@ -3948,11 > +3983,11 @@ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb) > break; > case OPC_OUB_SATA_COMP: > pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SATA_COMP\n"); > - mpi_sata_completion(pm8001_ha, piomb); > + mpi_sata_completion(pm8001_ha, circularQ, piomb); > break; > case OPC_OUB_SATA_EVENT: > pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SATA_EVENT\n"); > - mpi_sata_event(pm8001_ha, piomb); > + mpi_sata_event(pm8001_ha, circularQ, piomb); > break; > case OPC_OUB_SSP_EVENT: > pm8001_dbg(pm8001_ha, MSG, "OPC_OUB_SSP_EVENT\n"); @@ > -4121,7 +4156,6 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) > void *pMsg1 = NULL; > u8 bc; > u32 ret = MPI_IO_STATUS_FAIL; > - unsigned long flags; > u32 regval; > > if (vec == (pm8001_ha->max_q_num - 1)) { @@ -4138,7 +4172,7 @@ > static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) > } > } > circularQ = &pm8001_ha->outbnd_q_tbl[vec]; > - spin_lock_irqsave(&circularQ->oq_lock, flags); > + spin_lock_irqsave(&circularQ->oq_lock, circularQ->lock_flags); > do { > /* spurious interrupt during setup if kexec-ing and > * driver doing a doorbell access w/ the pre-kexec oq > @@ -4149,7 +4183,8 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) > ret = pm8001_mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc); > if (MPI_IO_STATUS_SUCCESS == ret) { > /* process the outbound message */ > - process_one_iomb(pm8001_ha, (void *)(pMsg1 - 4)); > + process_one_iomb(pm8001_ha, circularQ, > + (void *)(pMsg1 - 4)); > /* free the message from the outbound circular buffer */ > pm8001_mpi_msg_free_set(pm8001_ha, pMsg1, > circularQ, > bc); @@ -4164,7 +4199,7 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) > break; > } > } while (1); > - spin_unlock_irqrestore(&circularQ->oq_lock, flags); > + spin_unlock_irqrestore(&circularQ->oq_lock, > + circularQ->lock_flags); > return ret; > } > > -- > 2.27.0 >