One more small comment. Thanks, Avri > > > > The following unlikely races can be triggered by the completion path > > (ufshcd_trc_handler()): > > - After the UTRLCNR register has been read from interrupt context and > > before it is cleared, the UFS error handler reads the UTRLCNR register. > > Hold the SCSI host lock until the UTRLCNR register has been cleared to > > prevent that this register is accessed from another CPU before it has > > been cleared. > > - After the doorbell register has been read and before outstanding_reqs > > is cleared, the error handler reads the doorbell register. This can also > > result in double completions. Fix this by clearing outstanding_reqs > > before calling ufshcd_transfer_req_compl(). > > > > Due to this change ufshcd_trc_handler() no longer updates > > outstanding_reqs > > atomically. Hence protect all other outstanding_reqs changes with the SCSI > > host lock. > But isn't the whole point of using REG_UTP_TRANSFER_REQ_LIST_COMPL is to > eliminate the host lock > As a source of contention? > > > > > This patch is a performance improvement because it reduces the number > of > > atomic operations in the hot path (test_and_clear_bit()). > Both Can & Stanley reported a performance improvement of RR with > "Optimize host lock..". > Can those short numerical studies can be repeated with this patch? Please use rq_affinity = 2 this time. Thanks, Avri > > Thanks, > Avri > > > > > See also commit a45f937110fa ("scsi: ufs: Optimize host lock on transfer > > requests send/compl paths"). > > > > Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx> > > Cc: Stanley Chu <stanley.chu@xxxxxxxxxxxx> > > Cc: Can Guo <cang@xxxxxxxxxxxxxx> > > Cc: Asutosh Das <asutoshd@xxxxxxxxxxxxxx> > > Cc: Avri Altman <avri.altman@xxxxxxx> > > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > > --- > > drivers/scsi/ufs/ufshcd.c | 36 +++++++++++++++++------------------- > > 1 file changed, 17 insertions(+), 19 deletions(-) > > > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > > index 83a32b71240e..996b95ab74aa 100644 > > --- a/drivers/scsi/ufs/ufshcd.c > > +++ b/drivers/scsi/ufs/ufshcd.c > > @@ -2088,6 +2088,7 @@ static inline > > void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) > > { > > struct ufshcd_lrb *lrbp = &hba->lrb[task_tag]; > > + unsigned long flags; > > > > lrbp->issue_time_stamp = ktime_get(); > > lrbp->compl_time_stamp = ktime_set(0, 0); > > @@ -2096,19 +2097,12 @@ void ufshcd_send_command(struct ufs_hba > > *hba, unsigned int task_tag) > > ufshcd_clk_scaling_start_busy(hba); > > if (unlikely(ufshcd_should_inform_monitor(hba, lrbp))) > > ufshcd_start_monitor(hba, lrbp); > > - if (ufshcd_has_utrlcnr(hba)) { > > - set_bit(task_tag, &hba->outstanding_reqs); > > - ufshcd_writel(hba, 1 << task_tag, > > - REG_UTP_TRANSFER_REQ_DOOR_BELL); > > - } else { > > - unsigned long flags; > > > > - spin_lock_irqsave(hba->host->host_lock, flags); > > - set_bit(task_tag, &hba->outstanding_reqs); > > - ufshcd_writel(hba, 1 << task_tag, > > - REG_UTP_TRANSFER_REQ_DOOR_BELL); > > - spin_unlock_irqrestore(hba->host->host_lock, flags); > > - } > > + spin_lock_irqsave(hba->host->host_lock, flags); > > + __set_bit(task_tag, &hba->outstanding_reqs); > > + spin_unlock_irqrestore(hba->host->host_lock, flags); > > + > > + ufshcd_writel(hba, 1 << task_tag, > > REG_UTP_TRANSFER_REQ_DOOR_BELL); > > /* Make sure that doorbell is committed immediately */ > > wmb(); > > } > > @@ -2890,7 +2884,9 @@ static int ufshcd_wait_for_dev_cmd(struct > ufs_hba > > *hba, > > * we also need to clear the outstanding_request > > * field in hba > > */ > > - clear_bit(lrbp->task_tag, &hba->outstanding_reqs); > > + spin_lock_irqsave(hba->host->host_lock, flags); > > + __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); > > + spin_unlock_irqrestore(hba->host->host_lock, flags); > > } > > > > return err; > > @@ -5197,8 +5193,6 @@ static void ufshcd_transfer_req_compl(struct > > ufs_hba *hba, > > bool update_scaling = false; > > > > for_each_set_bit(index, &completed_reqs, hba->nutrs) { > > - if (!test_and_clear_bit(index, &hba->outstanding_reqs)) > > - continue; > > lrbp = &hba->lrb[index]; > > lrbp->compl_time_stamp = ktime_get(); > > cmd = lrbp->cmd; > > @@ -5241,6 +5235,7 @@ static void ufshcd_transfer_req_compl(struct > > ufs_hba *hba, > > static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool > use_utrlcnr) > > { > > unsigned long completed_reqs; > > + unsigned long flags; > > > > /* Resetting interrupt aggregation counters first and reading the > > * DOOR_BELL afterward allows us to handle all the completed > requests. > > @@ -5253,6 +5248,7 @@ static irqreturn_t ufshcd_trc_handler(struct > > ufs_hba *hba, bool use_utrlcnr) > > !(hba->quirks & UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR)) > > ufshcd_reset_intr_aggr(hba); > > > > + spin_lock_irqsave(hba->host->host_lock, flags); > > if (use_utrlcnr) { > > completed_reqs = ufshcd_readl(hba, > > REG_UTP_TRANSFER_REQ_LIST_COMPL); > > @@ -5260,14 +5256,16 @@ static irqreturn_t ufshcd_trc_handler(struct > > ufs_hba *hba, bool use_utrlcnr) > > ufshcd_writel(hba, completed_reqs, > > REG_UTP_TRANSFER_REQ_LIST_COMPL); > > } else { > > - unsigned long flags; > > u32 tr_doorbell; > > > > - spin_lock_irqsave(hba->host->host_lock, flags); > > tr_doorbell = ufshcd_readl(hba, > > REG_UTP_TRANSFER_REQ_DOOR_BELL); > > - completed_reqs = tr_doorbell ^ hba->outstanding_reqs; > > - spin_unlock_irqrestore(hba->host->host_lock, flags); > > + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > > } > > + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > > + "completed: %#lx; outstanding: %#lx\n", completed_reqs, > > + hba->outstanding_reqs); > > + hba->outstanding_reqs &= ~completed_reqs; > > + spin_unlock_irqrestore(hba->host->host_lock, flags); > > > > if (completed_reqs) { > > ufshcd_transfer_req_compl(hba, completed_reqs);