> > The time spent in io_schedule() is significant when submitting direct I/O to a > UFS device. Hence this patch that implements polling support. > User space software can enable polling by passing the RWF_HIPRI flag to the > preadv2() system call or the IORING_SETUP_IOPOLL flag to the io_uring > interface. Do you expect that this will fix, or at least to some extent, the queue drainage that is so noticeable in direct benchmarks? Can you share some early estimations? Also I think this should be a separate patch as well. > > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > drivers/scsi/ufs/ufshcd.c | 45 +++++++++++++++++++++++---------------- > 1 file changed, 27 insertions(+), 18 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index > 36df89e8a575..70f128f12445 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -5250,6 +5250,31 @@ static void __ufshcd_transfer_req_compl(struct > ufs_hba *hba, > } > } > > +/* > + * Returns > 0 if one or more commands have been completed or 0 if no > + * requests have been completed. > + */ > +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) > +{ > + struct ufs_hba *hba = shost_priv(shost); > + unsigned long completed_reqs, flags; > + u32 tr_doorbell; > + > + spin_lock_irqsave(&hba->outstanding_lock, flags); > + tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > + "completed: %#lx; outstanding: %#lx\n", completed_reqs, > + hba->outstanding_reqs); > + hba->outstanding_reqs &= ~completed_reqs; > + spin_unlock_irqrestore(&hba->outstanding_lock, flags); > + > + if (completed_reqs) > + __ufshcd_transfer_req_compl(hba, completed_reqs); > + > + return completed_reqs; > +} > + > /** > * ufshcd_transfer_req_compl - handle SCSI and query command completion > * @hba: per adapter instance > @@ -5260,9 +5285,6 @@ static void __ufshcd_transfer_req_compl(struct > ufs_hba *hba, > */ > static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) { > - unsigned long completed_reqs, flags; > - u32 tr_doorbell; > - > /* Resetting interrupt aggregation counters first and reading the > * DOOR_BELL afterward allows us to handle all the completed requests. > * In order to prevent other interrupts starvation the DB is read once @@ - > 5277,21 +5299,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct > ufs_hba *hba) > if (ufs_fail_completion()) > return IRQ_HANDLED; > > - spin_lock_irqsave(&hba->outstanding_lock, flags); > - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); > - completed_reqs = ~tr_doorbell & hba->outstanding_reqs; > - WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, > - "completed: %#lx; outstanding: %#lx\n", completed_reqs, > - hba->outstanding_reqs); > - hba->outstanding_reqs &= ~completed_reqs; > - spin_unlock_irqrestore(&hba->outstanding_lock, flags); > - > - if (completed_reqs) { > - __ufshcd_transfer_req_compl(hba, completed_reqs); > - return IRQ_HANDLED; > - } else { > - return IRQ_NONE; > - } > + return ufshcd_poll(hba->host, 0) ? IRQ_HANDLED : IRQ_NONE; > } > > int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) @@ - > 8112,6 +8120,7 @@ static struct scsi_host_template ufshcd_driver_template = > { > .name = UFSHCD, > .proc_name = UFSHCD, > .queuecommand = ufshcd_queuecommand, > + .mq_poll = ufshcd_poll, Did you consider to use some form blk_mq_tagset_busy_iter, And return nutrs - busy? Thanks, Avri > .slave_alloc = ufshcd_slave_alloc, > .slave_configure = ufshcd_slave_configure, > .slave_destroy = ufshcd_slave_destroy,