It isn't necessary to check the host depth in scsi_queue_rq() any more since it has been respected by blk-mq before calling scsi_queue_rq() via getting driver tag. Lots of LUNs may attach to same host, and per-host IOPS may reach millions level, so we should avoid to this expensive atomic operations on the hostwide counter in IO path. This patch implemens scsi_host_busy() via blk_mq_tagset_busy_iter() for reading the count of busy IOs for scsi_mq. It is observed that IOPS is increased by 15% in IO test on scsi_debug (32 LUNs, 32 submit queues, 1024 can_queue, libaio/dio) in one dual-socket system. Cc: Omar Sandoval <osandov@xxxxxx>, Cc: "Martin K. Petersen" <martin.petersen@xxxxxxxxxx>, Cc: James Bottomley <james.bottomley@xxxxxxxxxxxxxxxxxxxxx>, Cc: Christoph Hellwig <hch@xxxxxx>, Cc: Don Brace <don.brace@xxxxxxxxxxxxx> Cc: Kashyap Desai <kashyap.desai@xxxxxxxxxxxx> Cc: Mike Snitzer <snitzer@xxxxxxxxxx> Cc: Hannes Reinecke <hare@xxxxxxx> Cc: Laurence Oberman <loberman@xxxxxxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- drivers/scsi/hosts.c | 24 +++++++++++++++++++++++- drivers/scsi/scsi_lib.c | 23 +++++++++++++++++------ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 69beb30205f1..ad56e2b10ac8 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -564,13 +564,35 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost) } EXPORT_SYMBOL(scsi_host_get); +struct scsi_host_mq_in_flight { + int cnt; +}; + +static void scsi_host_check_in_flight(struct request *rq, void *data, + bool reserved) +{ + struct scsi_host_mq_in_flight *in_flight = data; + + if (blk_mq_request_started(rq)) + in_flight->cnt++; +} + /** * scsi_host_busy - Return the host busy counter * @shost: Pointer to Scsi_Host to inc. **/ int scsi_host_busy(struct Scsi_Host *shost) { - return atomic_read(&shost->host_busy); + struct scsi_host_mq_in_flight in_flight = { + .cnt = 0, + }; + + if (!shost->use_blk_mq) + return atomic_read(&shost->host_busy); + + blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight, + &in_flight); + return in_flight.cnt; } EXPORT_SYMBOL(scsi_host_busy); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 0dfec0dedd5e..dc437c642934 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -345,7 +345,8 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost) unsigned long flags; rcu_read_lock(); - atomic_dec(&shost->host_busy); + if (!shost->use_blk_mq) + atomic_dec(&shost->host_busy); if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) @@ -444,7 +445,12 @@ static inline bool scsi_target_is_busy(struct scsi_target *starget) static inline bool scsi_host_is_busy(struct Scsi_Host *shost) { - if (shost->can_queue > 0 && + /* + * blk-mq can handle host queue busy efficiently via host-wide driver + * tag allocation + */ + + if (!shost->use_blk_mq && shost->can_queue > 0 && atomic_read(&shost->host_busy) >= shost->can_queue) return true; if (atomic_read(&shost->host_blocked) > 0) @@ -1539,9 +1545,12 @@ static inline int scsi_host_queue_ready(struct request_queue *q, if (scsi_host_in_recovery(shost)) return 0; - busy = atomic_inc_return(&shost->host_busy) - 1; + if (!shost->use_blk_mq) + busy = atomic_inc_return(&shost->host_busy) - 1; + else + busy = 0; if (atomic_read(&shost->host_blocked) > 0) { - if (busy) + if (busy || scsi_host_busy(shost)) goto starved; /* @@ -1555,7 +1564,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q, "unblocking host at zero depth\n")); } - if (shost->can_queue > 0 && busy >= shost->can_queue) + if (!shost->use_blk_mq && shost->can_queue > 0 && busy >= shost->can_queue) goto starved; if (shost->host_self_blocked) goto starved; @@ -1641,7 +1650,9 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) * with the locks as normal issue path does. */ atomic_inc(&sdev->device_busy); - atomic_inc(&shost->host_busy); + + if (!shost->use_blk_mq) + atomic_inc(&shost->host_busy); if (starget->can_queue > 0) atomic_inc(&starget->target_busy); -- 2.9.5