Bart, We look forward to the support of UFSHCD polling, I did a review and test. comments as below: On Fri, 2021-11-19 at 11:57 -0800, Bart Van Assche wrote: > The time spent in io_schedule() and also the interrupt latency are > significant when submitting direct I/O to a UFS device. Hence this > patch > that implements polling support. User space software can enable > polling by > passing the RWF_HIPRI flag to the preadv2() system call or the > IORING_SETUP_IOPOLL flag to the io_uring interface. > > Although the block layer supports to partition the tag space for > interrupt-based completions (HCTX_TYPE_DEFAULT) purposes and polling > (HCTX_TYPE_POLL), the choice has been made to use the same hardware > queue for both hctx types because partitioning the tag space would > negatively affect performance. > > On my test setup this patch increases IOPS from 2736 to 22000 (8x) > for the > following test: > > for hipri in 0 1; do > fio --ioengine=io_uring --iodepth=1 --rw=randread \ > --runtime=60 --time_based=1 --direct=1 --name=qd1 \ > --filename=/dev/block/sda --ioscheduler=none --gtod_reduce=1 \ > --norandommap --hipri=$hipri > done For 4KB random read, direct IO, and iodepth=1, we did not see an improvement in IOPS due to this patch. Maybe the test case above is not sufficient. > > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > drivers/scsi/ufs/ufshcd.c | 85 ++++++++++++++++++++++++++++++------- > -- > 1 file changed, 65 insertions(+), 20 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index 9cf4a22f1950..14043d74389d 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -2629,6 +2629,36 @@ static inline bool is_device_wlun(struct > scsi_device *sdev) > ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_UFS_DEVICE_WLUN) > ; > } > > +/* > + * Associate the UFS controller queue with the default and poll HCTX > types. > + * Initialize the mq_map[] arrays. > + */ > +static int ufshcd_map_queues(struct Scsi_Host *shost) > +{ > + int i, ret; > + > + for (i = 0; i < shost->nr_maps; i++) { > + struct blk_mq_queue_map *map = &shost->tag_set.map[i]; > + > + switch (i) { > + case HCTX_TYPE_DEFAULT: > + case HCTX_TYPE_POLL: > + map->nr_queues = 1; > + break; > + case HCTX_TYPE_READ: > + map->nr_queues = 0; > + break; > + default: > + WARN_ON_ONCE(true); > + } > + map->queue_offset = 0; > + ret = blk_mq_map_queues(map); > + WARN_ON_ONCE(ret); > + } > + > + return 0; > +} > + > static void ufshcd_init_lrb(struct ufs_hba *hba, struct ufshcd_lrb > *lrb, int i) > { > struct utp_transfer_cmd_desc *cmd_descp = hba->ucdl_base_addr; > @@ -2664,7 +2694,7 @@ static int ufshcd_queuecommand(struct Scsi_Host > *host, struct scsi_cmnd *cmd) > struct ufshcd_lrb *lrbp; > int err = 0; > > - WARN_ONCE(tag < 0, "Invalid tag %d\n", tag); > + WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", > tag); > > /* > * Allows the UFS error handler to wait for prior > ufshcd_queuecommand() > @@ -2925,7 +2955,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba > *hba, > } > req = scsi_cmd_to_rq(scmd); > tag = req->tag; > - WARN_ONCE(tag < 0, "Invalid tag %d\n", tag); > + WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", > tag) probably this change should not in this patch? > /* > * Start the request such that blk_mq_tag_idle() is called when > the > * device management request finishes. > @@ -5272,6 +5302,31 @@ static void __ufshcd_transfer_req_compl(struct > ufs_hba *hba, > } > } > > +/* > + * Returns > 0 if one or more commands have been completed or 0 if > no > + * requests have been completed. > + */ > +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int > queue_num) > +{ > + struct ufs_hba *hba = shost_priv(shost); > + unsigned long completed_reqs, flags; > + u32 tr_doorbell; > + > + spin_lock_irqsave(&hba->outstanding_lock, flags); > + tr_doorbell = ufshcd_readl(hba, > REG_UTP_TRANSFER_REQ_DOOR_BELL); > + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > + "completed: %#lx; outstanding: %#lx\n", > completed_reqs, > + hba->outstanding_reqs); > + hba->outstanding_reqs &= ~completed_reqs; > + spin_unlock_irqrestore(&hba->outstanding_lock, flags); > + > + if (completed_reqs) > + __ufshcd_transfer_req_compl(hba, completed_reqs); > + > + return completed_reqs; > +} > + > /** > * ufshcd_transfer_req_compl - handle SCSI and query command > completion > * @hba: per adapter instance > @@ -5282,9 +5337,6 @@ static void __ufshcd_transfer_req_compl(struct > ufs_hba *hba, > */ > static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) > { > - unsigned long completed_reqs, flags; > - u32 tr_doorbell; > - > /* Resetting interrupt aggregation counters first and reading > the > * DOOR_BELL afterward allows us to handle all the completed > requests. > * In order to prevent other interrupts starvation the DB is > read once > @@ -5299,21 +5351,9 @@ static irqreturn_t > ufshcd_transfer_req_compl(struct ufs_hba *hba) > if (ufs_fail_completion()) > return IRQ_HANDLED; > > - spin_lock_irqsave(&hba->outstanding_lock, flags); > - tr_doorbell = ufshcd_readl(hba, > REG_UTP_TRANSFER_REQ_DOOR_BELL); > - completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > - WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > - "completed: %#lx; outstanding: %#lx\n", > completed_reqs, > - hba->outstanding_reqs); > - hba->outstanding_reqs &= ~completed_reqs; > - spin_unlock_irqrestore(&hba->outstanding_lock, flags); > + ufshcd_poll(hba->host, 0); > > - if (completed_reqs) { > - __ufshcd_transfer_req_compl(hba, completed_reqs); > - return IRQ_HANDLED; > - } else { > - return IRQ_NONE; > - } > + return IRQ_HANDLED; > } > ufshcd_transfer_req_compl() will never return IRQ_NONE, but ufshcd_intr() needs this return to check the fake interrupt. > int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) > @@ -6564,6 +6604,8 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba > *hba, > spin_lock_irqsave(host->host_lock, flags); > > task_tag = req->tag; > + WARN_ONCE(task_tag < 0 || task_tag >= hba->nutmrs, "Invalid tag > %d\n", > + task_tag); > hba->tmf_rqs[req->tag] = req; > treq->upiu_req.req_header.dword_0 |= cpu_to_be32(task_tag); > > @@ -6705,7 +6747,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct > ufs_hba *hba, > } > req = scsi_cmd_to_rq(scmd); > tag = req->tag; > - WARN_ONCE(tag < 0, "Invalid tag %d\n", tag); > + WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", > tag); > /* > * Start the request such that blk_mq_tag_idle() is called when > the > * device management request finishes. > @@ -8147,7 +8189,9 @@ static struct scsi_host_template > ufshcd_driver_template = { > .module = THIS_MODULE, > .name = UFSHCD, > .proc_name = UFSHCD, > + .map_queues = ufshcd_map_queues, > .queuecommand = ufshcd_queuecommand, > + .mq_poll = ufshcd_poll, > .slave_alloc = ufshcd_slave_alloc, > .slave_configure = ufshcd_slave_configure, > .slave_destroy = ufshcd_slave_destroy, > @@ -9437,6 +9481,7 @@ int ufshcd_alloc_host(struct device *dev, > struct ufs_hba **hba_handle) > err = -ENOMEM; > goto out_error; > } > + host->nr_maps = 3; I don't understand why not directly uses HCTX_MAX_TYPES, 3 is already maximum. > hba = shost_priv(host); > hba->host = host; > hba->dev = dev;