Re: [PATCH 1/2] scsi_host: add support for request batching

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Wed, 19 Jun 2019 12:31:58 +0200

On 19/06/19 10:11, Hannes Reinecke wrote:
> On 5/30/19 1:28 PM, Paolo Bonzini wrote:
>> This allows a list of requests to be issued, with the LLD only writing
>> the hardware doorbell when necessary, after the last request was prepared.
>> This is more efficient if we have lists of requests to issue, particularly
>> on virtualized hardware, where writing the doorbell is more expensive than
>> on real hardware.
>>
>> The use case for this is plugged IO, where blk-mq flushes a batch of
>> requests all at once.
>>
>> The API is the same as for blk-mq, just with blk-mq concepts tweaked to
>> fit the SCSI subsystem API: the "last" flag in blk_mq_queue_data becomes
>> a flag in scsi_cmnd, while the queue_num in the commit_rqs callback is
>> extracted from the hctx and passed as a parameter.
>>
>> The only complication is that blk-mq uses different plugging heuristics
>> depending on whether commit_rqs is present or not.  So we have two
>> different sets of blk_mq_ops and pick one depending on whether the
>> scsi_host template uses commit_rqs or not.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
>> ---
>>  drivers/scsi/scsi_lib.c  | 37 ++++++++++++++++++++++++++++++++++---
>>  include/scsi/scsi_cmnd.h |  1 +
>>  include/scsi/scsi_host.h | 16 ++++++++++++++--
>>  3 files changed, 49 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
>> index 601b9f1de267..eb4e67d02bfe 100644
>> --- a/drivers/scsi/scsi_lib.c
>> +++ b/drivers/scsi/scsi_lib.c
>> @@ -1673,10 +1673,11 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
>>  		blk_mq_start_request(req);
>>  	}
>>  
>> +	cmd->flags &= SCMD_PRESERVED_FLAGS;
>>  	if (sdev->simple_tags)
>>  		cmd->flags |= SCMD_TAGGED;
>> -	else
>> -		cmd->flags &= ~SCMD_TAGGED;
>> +	if (bd->last)
>> +		cmd->flags |= SCMD_LAST;
>>  
>>  	scsi_init_cmd_errh(cmd);
>>  	cmd->scsi_done = scsi_mq_done;
>> @@ -1807,10 +1808,37 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
>>  }
>>  EXPORT_SYMBOL_GPL(__scsi_init_queue);
>>  
>> +static const struct blk_mq_ops scsi_mq_ops_no_commit = {
>> +	.get_budget	= scsi_mq_get_budget,
>> +	.put_budget	= scsi_mq_put_budget,
>> +	.queue_rq	= scsi_queue_rq,
>> +	.complete	= scsi_softirq_done,
>> +	.timeout	= scsi_timeout,
>> +#ifdef CONFIG_BLK_DEBUG_FS
>> +	.show_rq	= scsi_show_rq,
>> +#endif
>> +	.init_request	= scsi_mq_init_request,
>> +	.exit_request	= scsi_mq_exit_request,
>> +	.initialize_rq_fn = scsi_initialize_rq,
>> +	.busy		= scsi_mq_lld_busy,
>> +	.map_queues	= scsi_map_queues,
>> +};
>> +
>> +
>> +static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx)
>> +{
>> +	struct request_queue *q = hctx->queue;
>> +	struct scsi_device *sdev = q->queuedata;
>> +	struct Scsi_Host *shost = sdev->host;
>> +
>> +	shost->hostt->commit_rqs(shost, hctx->queue_num);
>> +}
>> +
>>  static const struct blk_mq_ops scsi_mq_ops = {
>>  	.get_budget	= scsi_mq_get_budget,
>>  	.put_budget	= scsi_mq_put_budget,
>>  	.queue_rq	= scsi_queue_rq,
>> +	.commit_rqs	= scsi_commit_rqs,
>>  	.complete	= scsi_softirq_done,
>>  	.timeout	= scsi_timeout,
>>  #ifdef CONFIG_BLK_DEBUG_FS
>> @@ -1845,7 +1873,10 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
>>  		cmd_size += sizeof(struct scsi_data_buffer) + sgl_size;
>>  
>>  	memset(&shost->tag_set, 0, sizeof(shost->tag_set));
>> -	shost->tag_set.ops = &scsi_mq_ops;
>> +	if (shost->hostt->commit_rqs)
>> +		shost->tag_set.ops = &scsi_mq_ops;
>> +	else
>> +		shost->tag_set.ops = &scsi_mq_ops_no_commit;
>>  	shost->tag_set.nr_hw_queues = shost->nr_hw_queues ? : 1;
>>  	shost->tag_set.queue_depth = shost->can_queue;
>>  	shost->tag_set.cmd_size = cmd_size;
>> diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
>> index 76ed5e4acd38..91bd749a02f7 100644
>> --- a/include/scsi/scsi_cmnd.h
>> +++ b/include/scsi/scsi_cmnd.h
>> @@ -57,6 +57,7 @@ struct scsi_pointer {
>>  #define SCMD_TAGGED		(1 << 0)
>>  #define SCMD_UNCHECKED_ISA_DMA	(1 << 1)
>>  #define SCMD_INITIALIZED	(1 << 2)
>> +#define SCMD_LAST		(1 << 3)
>>  /* flags preserved across unprep / reprep */
>>  #define SCMD_PRESERVED_FLAGS	(SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED)
>>  
>> diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
>> index 2b539a1b3f62..28f1c9177cd2 100644
>> --- a/include/scsi/scsi_host.h
>> +++ b/include/scsi/scsi_host.h
>> @@ -80,8 +80,10 @@ struct scsi_host_template {
>>  	 * command block to the LLDD.  When the driver finished
>>  	 * processing the command the done callback is invoked.
>>  	 *
>> -	 * If queuecommand returns 0, then the HBA has accepted the
>> -	 * command.  The done() function must be called on the command
>> +	 * If queuecommand returns 0, then the driver has accepted the
>> +	 * command.  It must also push it to the HBA if the scsi_cmnd
>> +	 * flag SCMD_LAST is set, or if the driver does not implement
>> +	 * commit_rqs.  The done() function must be called on the command
>>  	 * when the driver has finished with it. (you may call done on the
>>  	 * command before queuecommand returns, but in this case you
>>  	 * *must* return 0 from queuecommand).
>> @@ -109,6 +111,16 @@ struct scsi_host_template {
>>  	 */
>>  	int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *);
>>  
>> +	/*
>> +	 * The commit_rqs function is used to trigger a hardware
>> +	 * doorbell after some requests have been queued with
>> +	 * queuecommand, when an error is encountered before sending
>> +	 * the request with SCMD_LAST set.
>> +	 *
>> +	 * STATUS: OPTIONAL
>> +	 */
>> +	void (*commit_rqs)(struct Scsi_Host *, u16);
>> +
>>  	/*
>>  	 * This is an error handling strategy routine.  You don't need to
>>  	 * define one of these if you don't want to - there is a default
>>
> I'm a bit unsure if 'bd->last' is always set; it's quite obvious that
> it's present if set, but what about requests with 'bd->last == false' ?
> Is there a guarantee that they will _always_ be followed with a request
> with bd->last == true?
> And if so, is there a guarantee that this request is part of the same batch?

It's complicated.  A request with bd->last == false _will_ always be
followed by a request with bd->last == true in the same batch.  However,
due to e.g. errors it may be possible that the last request is not sent.
 In that case, the block layer sends commit_rqs, as documented in the
comment above, to flush the requests that have been sent already.

So, a driver that obeys bd->last (or SCMD_LAST) but does not implement
commit_rqs is bound to have bugs, which is why this patch was not split
further.

Makes sense?

Paolo

> Aside from it: I think it's a good idea to match the '->last' setting
> onto the SCMD_LAST flag; I would even go so far and make this an
> independent patch.

> Once to above points are cleared, that is.
> 
> But if that one is in, why do we need to have the separate 'commit_rqs'
> callback?
> Can't we let the driver decide to issue a doorbell kick (or whatever the
> driver decides to do there)?
> If we ensure that the SCMD_LAST flag is always set for the end of a
> batch (even if this batch consists only of one request), the driver
> simply can evaluate the flag and do its actions.
> Why do we need a new callback here?
> 
> Cheers,
> 
> Hannes
>