Re: [PATCH] scsi: avoid to fetch scsi host template instance in IO path

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 28/02/2020 09:33, Ming Lei wrote:
scsi host template struct is quite big, and the following three
fields are needed in SCSI IO path:

- queuecommand
- commit_rqs
- cmd_size

Would it have been nearly as good to reorganise Scsi host template structure to ensure that these are adjacent?

I say nearly, as it avoids the shost->hostt read.


Cache them into scsi host intance, so that we can avoid to fetch
big scsi host template instance in IO path.

40% IOPS boost can be observed in my scsi_debug performance test after
applying this change.

Cc: Sumanesh Samanta <sumanesh.samanta@xxxxxxxxxxxx>
Cc: Ewan D . Milne <emilne@xxxxxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxx>
Cc: Bart Van Assche <bart.vanassche@xxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
  drivers/scsi/hosts.c     |  4 ++++
  drivers/scsi/scsi_lib.c  | 10 +++++-----
  include/scsi/scsi_host.h | 11 ++++++++++-
  3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 1d669e47b692..8012c1db092e 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -466,6 +466,10 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
  	if (sht->virt_boundary_mask)
  		shost->virt_boundary_mask = sht->virt_boundary_mask;
+ shost->cmd_size = sht->cmd_size;
+	shost->queuecommand = sht->queuecommand;
+	shost->commit_rqs = sht->commit_rqs;
+
  	device_initialize(&shost->shost_gendev);
  	dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
  	shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e7fbf3a9a6aa..f4243ae1d4a9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1148,7 +1148,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
  	in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state);
  	/* zero out the cmd, except for the embedded scsi_request */
  	memset((char *)cmd + sizeof(cmd->req), 0,
-		sizeof(*cmd) - sizeof(cmd->req) + dev->host->hostt->cmd_size);
+		sizeof(*cmd) - sizeof(cmd->req) + dev->host->cmd_size);
cmd->device = dev;
  	cmd->sense_buffer = buf;
@@ -1547,7 +1547,7 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
  	}
trace_scsi_dispatch_cmd_start(cmd);
-	rtn = host->hostt->queuecommand(host, cmd);
+	rtn = host->queuecommand(host, cmd);
  	if (rtn) {
  		trace_scsi_dispatch_cmd_error(cmd, rtn);
  		if (rtn != SCSI_MLQUEUE_DEVICE_BUSY &&
@@ -1584,7 +1584,7 @@ static blk_status_t scsi_mq_prep_fn(struct request *req)
  	cmd->tag = req->tag;
  	cmd->prot_op = SCSI_PROT_NORMAL;
- sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
+	sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->cmd_size;
  	cmd->sdb.table.sgl = sg;
if (scsi_host_get_prot(shost)) {
@@ -1752,7 +1752,7 @@ static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
if (scsi_host_get_prot(shost)) {
  		sg = (void *)cmd + sizeof(struct scsi_cmnd) +
-			shost->hostt->cmd_size;
+			shost->cmd_size;
  		cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost);
  	}
@@ -1844,7 +1844,7 @@ static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx)
  	struct scsi_device *sdev = q->queuedata;
  	struct Scsi_Host *shost = sdev->host;
- shost->hostt->commit_rqs(shost, hctx->queue_num);
+	shost->commit_rqs(shost, hctx->queue_num);
  }
static const struct blk_mq_ops scsi_mq_ops = {
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index f577647bf5f2..ccd5b9a5de2a 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -522,7 +522,16 @@ struct Scsi_Host {
  	 */
  	struct list_head	__devices;
  	struct list_head	__targets;
-	
+
+	/*
+	 * cacahe the three fields from scsi_host_template, so that
+	 * the big host template  instance needn't to be fetched in
+	 * IO path. Big IOPS boost can be observed by this way.
+	 */
+	unsigned int cmd_size;

any reason not to add cacheline alignment? And I think function pointers will be at least same size as unsigned int, so it may make more sense to put cmd_size last.

+	int (*queuecommand)(struct Scsi_Host *, struct scsi_cmnd *);
+	void (*commit_rqs)(struct Scsi_Host *, u16);
+
  	struct list_head	starved_list;
spinlock_t default_lock;





[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux