> > The following unlikely races can be triggered by the completion path > (ufshcd_trc_handler()): > - After the UTRLCNR register has been read from interrupt context and > before it is cleared, the UFS error handler reads the UTRLCNR register. > Hold the SCSI host lock until the UTRLCNR register has been cleared to > prevent that this register is accessed from another CPU before it has > been cleared. > - After the doorbell register has been read and before outstanding_reqs > is cleared, the error handler reads the doorbell register. This can also > result in double completions. Fix this by clearing outstanding_reqs > before calling ufshcd_transfer_req_compl(). > > Due to this change ufshcd_trc_handler() no longer updates > outstanding_reqs > atomically. Hence protect all other outstanding_reqs changes with the SCSI > host lock. But isn't the whole point of REG_UTP_TRANSFER_REQ_LIST_COMPL is to eliminate the host lock As a source of contention? > > This patch is a performance improvement because it reduces the number of > atomic operations in the hot path (test_and_clear_bit()). Both Can & Stanley reported a performance improvement of RR with "Optimize host lock..". Can those short numerical studies can be repeated with this patch? Thanks, Avri > > See also commit a45f937110fa ("scsi: ufs: Optimize host lock on transfer > requests send/compl paths"). > > Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx> > Cc: Stanley Chu <stanley.chu@xxxxxxxxxxxx> > Cc: Can Guo <cang@xxxxxxxxxxxxxx> > Cc: Asutosh Das <asutoshd@xxxxxxxxxxxxxx> > Cc: Avri Altman <avri.altman@xxxxxxx> > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > drivers/scsi/ufs/ufshcd.c | 36 +++++++++++++++++------------------- > 1 file changed, 17 insertions(+), 19 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 83a32b71240e..996b95ab74aa 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -2088,6 +2088,7 @@ static inline > void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) > { > struct ufshcd_lrb *lrbp = &hba->lrb[task_tag]; > + unsigned long flags; > > lrbp->issue_time_stamp = ktime_get(); > lrbp->compl_time_stamp = ktime_set(0, 0); > @@ -2096,19 +2097,12 @@ void ufshcd_send_command(struct ufs_hba > *hba, unsigned int task_tag) > ufshcd_clk_scaling_start_busy(hba); > if (unlikely(ufshcd_should_inform_monitor(hba, lrbp))) > ufshcd_start_monitor(hba, lrbp); > - if (ufshcd_has_utrlcnr(hba)) { > - set_bit(task_tag, &hba->outstanding_reqs); > - ufshcd_writel(hba, 1 << task_tag, > - REG_UTP_TRANSFER_REQ_DOOR_BELL); > - } else { > - unsigned long flags; > > - spin_lock_irqsave(hba->host->host_lock, flags); > - set_bit(task_tag, &hba->outstanding_reqs); > - ufshcd_writel(hba, 1 << task_tag, > - REG_UTP_TRANSFER_REQ_DOOR_BELL); > - spin_unlock_irqrestore(hba->host->host_lock, flags); > - } > + spin_lock_irqsave(hba->host->host_lock, flags); > + __set_bit(task_tag, &hba->outstanding_reqs); > + spin_unlock_irqrestore(hba->host->host_lock, flags); > + > + ufshcd_writel(hba, 1 << task_tag, > REG_UTP_TRANSFER_REQ_DOOR_BELL); > /* Make sure that doorbell is committed immediately */ > wmb(); > } > @@ -2890,7 +2884,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba > *hba, > * we also need to clear the outstanding_request > * field in hba > */ > - clear_bit(lrbp->task_tag, &hba->outstanding_reqs); > + spin_lock_irqsave(hba->host->host_lock, flags); > + __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); > + spin_unlock_irqrestore(hba->host->host_lock, flags); > } > > return err; > @@ -5197,8 +5193,6 @@ static void ufshcd_transfer_req_compl(struct > ufs_hba *hba, > bool update_scaling = false; > > for_each_set_bit(index, &completed_reqs, hba->nutrs) { > - if (!test_and_clear_bit(index, &hba->outstanding_reqs)) > - continue; > lrbp = &hba->lrb[index]; > lrbp->compl_time_stamp = ktime_get(); > cmd = lrbp->cmd; > @@ -5241,6 +5235,7 @@ static void ufshcd_transfer_req_compl(struct > ufs_hba *hba, > static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr) > { > unsigned long completed_reqs; > + unsigned long flags; > > /* Resetting interrupt aggregation counters first and reading the > * DOOR_BELL afterward allows us to handle all the completed requests. > @@ -5253,6 +5248,7 @@ static irqreturn_t ufshcd_trc_handler(struct > ufs_hba *hba, bool use_utrlcnr) > !(hba->quirks & UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR)) > ufshcd_reset_intr_aggr(hba); > > + spin_lock_irqsave(hba->host->host_lock, flags); > if (use_utrlcnr) { > completed_reqs = ufshcd_readl(hba, > REG_UTP_TRANSFER_REQ_LIST_COMPL); > @@ -5260,14 +5256,16 @@ static irqreturn_t ufshcd_trc_handler(struct > ufs_hba *hba, bool use_utrlcnr) > ufshcd_writel(hba, completed_reqs, > REG_UTP_TRANSFER_REQ_LIST_COMPL); > } else { > - unsigned long flags; > u32 tr_doorbell; > > - spin_lock_irqsave(hba->host->host_lock, flags); > tr_doorbell = ufshcd_readl(hba, > REG_UTP_TRANSFER_REQ_DOOR_BELL); > - completed_reqs = tr_doorbell ^ hba->outstanding_reqs; > - spin_unlock_irqrestore(hba->host->host_lock, flags); > + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > } > + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > + "completed: %#lx; outstanding: %#lx\n", completed_reqs, > + hba->outstanding_reqs); > + hba->outstanding_reqs &= ~completed_reqs; > + spin_unlock_irqrestore(hba->host->host_lock, flags); > > if (completed_reqs) { > ufshcd_transfer_req_compl(hba, completed_reqs);