scsi host template struct is quite big, and the following three fields are needed in SCSI IO path: - queuecommand - commit_rqs - cmd_size Cache them into scsi host intance, so that we can avoid to fetch big scsi host template instance in IO path. 40% IOPS boost can be observed in my scsi_debug performance test after applying this change. Cc: Sumanesh Samanta <sumanesh.samanta@xxxxxxxxxxxx> Cc: Ewan D . Milne <emilne@xxxxxxxxxx> Cc: Hannes Reinecke <hare@xxxxxxx> Cc: Bart Van Assche <bart.vanassche@xxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- drivers/scsi/hosts.c | 4 ++++ drivers/scsi/scsi_lib.c | 10 +++++----- include/scsi/scsi_host.h | 11 ++++++++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 1d669e47b692..8012c1db092e 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -466,6 +466,10 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) if (sht->virt_boundary_mask) shost->virt_boundary_mask = sht->virt_boundary_mask; + shost->cmd_size = sht->cmd_size; + shost->queuecommand = sht->queuecommand; + shost->commit_rqs = sht->commit_rqs; + device_initialize(&shost->shost_gendev); dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e7fbf3a9a6aa..f4243ae1d4a9 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1148,7 +1148,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state); /* zero out the cmd, except for the embedded scsi_request */ memset((char *)cmd + sizeof(cmd->req), 0, - sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size); + sizeof(*cmd) - sizeof(cmd->req) + dev->host->cmd_size); cmd->device = dev; cmd->sense_buffer = buf; @@ -1547,7 +1547,7 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } trace_scsi_dispatch_cmd_start(cmd); - rtn = host->hostt->queuecommand(host, cmd); + rtn = host->queuecommand(host, cmd); if (rtn) { trace_scsi_dispatch_cmd_error(cmd, rtn); if (rtn != SCSI_MLQUEUE_DEVICE_BUSY && @@ -1584,7 +1584,7 @@ static blk_status_t scsi_mq_prep_fn(struct request *req) cmd->tag = req->tag; cmd->prot_op = SCSI_PROT_NORMAL; - sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; + sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->cmd_size; cmd->sdb.table.sgl = sg; if (scsi_host_get_prot(shost)) { @@ -1752,7 +1752,7 @@ static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, if (scsi_host_get_prot(shost)) { sg = (void *)cmd + sizeof(struct scsi_cmnd) + - shost->hostt->cmd_size; + shost->cmd_size; cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost); } @@ -1844,7 +1844,7 @@ static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx) struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost = sdev->host; - shost->hostt->commit_rqs(shost, hctx->queue_num); + shost->commit_rqs(shost, hctx->queue_num); } static const struct blk_mq_ops scsi_mq_ops = { diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index f577647bf5f2..ccd5b9a5de2a 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -522,7 +522,16 @@ struct Scsi_Host { */ struct list_head __devices; struct list_head __targets; - + + /* + * cacahe the three fields from scsi_host_template, so that + * the big host template instance needn't to be fetched in + * IO path. Big IOPS boost can be observed by this way. + */ + unsigned int cmd_size; + int (*queuecommand)(struct Scsi_Host *, struct scsi_cmnd *); + void (*commit_rqs)(struct Scsi_Host *, u16); + struct list_head starved_list; spinlock_t default_lock; -- 2.20.1