On 7/16/21 9:54 AM, Bart Van Assche wrote:
On 7/16/21 9:26 AM, Asutosh Das (asd) wrote:
I agree. We saw substantial improvement with RR and RW too with the
'Optimize host lock change'.
Recent UFS driver patches introduced three changes:
(1) Use the UTRLCNR register instead of the doorbell register in the
completion path.
(2) Use atomic instructions instead of the host lock for updating the
outstanding_reqs structure member.
(3) Reduce lock contention on the SCSI host lock.
My patch preserves (3) so it should preserve the performance
improvements that are the result of eliminating lock contention for
outstanding_reqs updates.
For clarity, this is the patch for which I reported a 1% performance improvement:
Subject: [PATCH] ufs: Fix a race in the completion path
The following unlikely races can be triggered by the completion path
(ufshcd_trc_handler()):
- After the UTRLCNR register has been read from interrupt context and
before it is cleared, the UFS error handler reads the UTRLCNR register.
Hold the SCSI host lock until the UTRLCNR register has been cleared to
prevent that this register is accessed from another CPU before it has
been cleared.
- After the doorbell register has been read and before outstanding_reqs
is cleared, the error handler reads the doorbell register. This can also
result in double completions. Fix this by clearing outstanding_reqs
before calling ufshcd_transfer_req_compl().
Due to this change ufshcd_trc_handler() no longer updates outstanding_reqs
atomically. Hence protect all other outstanding_reqs changes with the SCSI
host lock.
This patch is a performance improvement because it reduces the number of
atomic operations in the hot path (test_and_clear_bit()).
See also commit a45f937110fa ("scsi: ufs: Optimize host lock on transfer
requests send/compl paths").
Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
Cc: Stanley Chu <stanley.chu@xxxxxxxxxxxx>
Cc: Can Guo <cang@xxxxxxxxxxxxxx>
Cc: Asutosh Das <asutoshd@xxxxxxxxxxxxxx>
Cc: Avri Altman <avri.altman@xxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
drivers/scsi/ufs/ufshcd.c | 52 ++++++++++++++++++---------------------
drivers/scsi/ufs/ufshcd.h | 2 ++
2 files changed, 26 insertions(+), 28 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 0cb84a744dad..7b8d3928fed8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2088,6 +2088,7 @@ static inline
void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
{
struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
+ unsigned long flags;
lrbp->issue_time_stamp = ktime_get();
lrbp->compl_time_stamp = ktime_set(0, 0);
@@ -2096,19 +2097,12 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
ufshcd_clk_scaling_start_busy(hba);
if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
ufshcd_start_monitor(hba, lrbp);
- if (ufshcd_has_utrlcnr(hba)) {
- set_bit(task_tag, &hba->outstanding_reqs);
- ufshcd_writel(hba, 1 << task_tag,
- REG_UTP_TRANSFER_REQ_DOOR_BELL);
- } else {
- unsigned long flags;
- spin_lock_irqsave(hba->host->host_lock, flags);
- set_bit(task_tag, &hba->outstanding_reqs);
- ufshcd_writel(hba, 1 << task_tag,
- REG_UTP_TRANSFER_REQ_DOOR_BELL);
- spin_unlock_irqrestore(hba->host->host_lock, flags);
- }
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ __set_bit(task_tag, &hba->outstanding_reqs);
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
/* Make sure that doorbell is committed immediately */
wmb();
}
@@ -2890,7 +2884,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
* we also need to clear the outstanding_request
* field in hba
*/
- clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
}
return err;
@@ -5197,8 +5193,6 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba,
bool update_scaling = false;
for_each_set_bit(index, &completed_reqs, hba->nutrs) {
- if (!test_and_clear_bit(index, &hba->outstanding_reqs))
- continue;
lrbp = &hba->lrb[index];
lrbp->compl_time_stamp = ktime_get();
cmd = lrbp->cmd;
@@ -5241,6 +5235,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba,
static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr)
{
unsigned long completed_reqs = 0;
+ unsigned long flags;
/* Resetting interrupt aggregation counters first and reading the
* DOOR_BELL afterward allows us to handle all the completed requests.
@@ -5253,24 +5248,24 @@ static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr)
!(hba->quirks & UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR))
ufshcd_reset_intr_aggr(hba);
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
if (use_utrlcnr) {
- u32 utrlcnr;
-
- utrlcnr = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_LIST_COMPL);
- if (utrlcnr) {
- ufshcd_writel(hba, utrlcnr,
+ completed_reqs = ufshcd_readl(hba,
+ REG_UTP_TRANSFER_REQ_LIST_COMPL);
+ if (completed_reqs)
+ ufshcd_writel(hba, completed_reqs,
REG_UTP_TRANSFER_REQ_LIST_COMPL);
- completed_reqs = utrlcnr;
- }
} else {
- unsigned long flags;
u32 tr_doorbell;
- spin_lock_irqsave(hba->host->host_lock, flags);
tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
- completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
- spin_unlock_irqrestore(hba->host->host_lock, flags);
+ completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
}
+ WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
+ "completed: %#lx; outstanding: %#lx\n", completed_reqs,
+ hba->outstanding_reqs);
+ hba->outstanding_reqs &= ~completed_reqs;
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
if (completed_reqs) {
ufshcd_transfer_req_compl(hba, completed_reqs);
@@ -9357,10 +9352,11 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
hba = shost_priv(host);
hba->host = host;
hba->dev = dev;
- *hba_handle = hba;
hba->dev_ref_clk_freq = REF_CLK_FREQ_INVAL;
-
INIT_LIST_HEAD(&hba->clk_list_head);
+ spin_lock_init(&hba->outstanding_lock);
+
+ *hba_handle = hba;
out_error:
return err;
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index f8766e8f3cac..e47a796bc114 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -695,6 +695,7 @@ struct ufs_hba_monitor {
* @lrb: local reference block
* @cmd_queue: Used to allocate command tags from hba->host->tag_set.
* @outstanding_tasks: Bits representing outstanding task requests
+ * @outstanding_lock: Protects @outstanding_reqs.
* @outstanding_reqs: Bits representing outstanding transfer requests
* @capabilities: UFS Controller Capabilities
* @nutrs: Transfer Request Queue depth supported by controller
@@ -781,6 +782,7 @@ struct ufs_hba {
struct ufshcd_lrb *lrb;
unsigned long outstanding_tasks;
+ spinlock_t outstanding_lock;
unsigned long outstanding_reqs;
u32 capabilities;