Don't burden the common block code with with specifics of the libata DMA draining mechanism. Instead move most of the code to the scsi midlayer. That also means the nr_phys_segments adjustments in the blk-mq fast path can go away entirely, given that SCSI never looks at nr_phys_segments after mapping the request to a scatterlist. Signed-off-by: Christoph Hellwig <hch@xxxxxx> wip --- block/blk-merge.c | 14 -------------- block/blk-mq.c | 11 ----------- block/blk-settings.c | 37 ------------------------------------ drivers/ata/libata-scsi.c | 28 ++++++++++----------------- drivers/scsi/scsi_lib.c | 39 +++++++++++++++++++++++++++++++++----- include/linux/blkdev.h | 7 ------- include/linux/libata.h | 2 ++ include/scsi/scsi_device.h | 3 +++ include/scsi/scsi_host.h | 7 +++++++ 9 files changed, 56 insertions(+), 92 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index ee618cdb141e..25f5a5e00ee6 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -539,20 +539,6 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq, rq->extra_len += pad_len; } - if (q->dma_drain_size && q->dma_drain_needed(rq)) { - if (op_is_write(req_op(rq))) - memset(q->dma_drain_buffer, 0, q->dma_drain_size); - - sg_unmark_end(*last_sg); - *last_sg = sg_next(*last_sg); - sg_set_page(*last_sg, virt_to_page(q->dma_drain_buffer), - q->dma_drain_size, - ((unsigned long)q->dma_drain_buffer) & - (PAGE_SIZE - 1)); - nsegs++; - rq->extra_len += q->dma_drain_size; - } - if (*last_sg) sg_mark_end(*last_sg); diff --git a/block/blk-mq.c b/block/blk-mq.c index 8e56884fd2e9..28ad7e1e850b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -667,15 +667,6 @@ void blk_mq_start_request(struct request *rq) blk_add_timer(rq); WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); - if (q->dma_drain_size && blk_rq_bytes(rq)) { - /* - * Make sure space for the drain appears. We know we can do - * this because max_hw_segments has been adjusted to be one - * fewer than the device can handle. - */ - rq->nr_phys_segments++; - } - #ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) q->integrity.profile->prepare_fn(rq); @@ -695,8 +686,6 @@ static void __blk_mq_requeue_request(struct request *rq) if (blk_mq_request_started(rq)) { WRITE_ONCE(rq->state, MQ_RQ_IDLE); rq->rq_flags &= ~RQF_TIMED_OUT; - if (q->dma_drain_size && blk_rq_bytes(rq)) - rq->nr_phys_segments--; } } diff --git a/block/blk-settings.c b/block/blk-settings.c index 14397b4c4b53..2ab1967b9716 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -651,43 +651,6 @@ void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask) } EXPORT_SYMBOL(blk_queue_update_dma_pad); -/** - * blk_queue_dma_drain - Set up a drain buffer for excess dma. - * @q: the request queue for the device - * @dma_drain_needed: fn which returns non-zero if drain is necessary - * @buf: physically contiguous buffer - * @size: size of the buffer in bytes - * - * Some devices have excess DMA problems and can't simply discard (or - * zero fill) the unwanted piece of the transfer. They have to have a - * real area of memory to transfer it into. The use case for this is - * ATAPI devices in DMA mode. If the packet command causes a transfer - * bigger than the transfer size some HBAs will lock up if there - * aren't DMA elements to contain the excess transfer. What this API - * does is adjust the queue so that the buf is always appended - * silently to the scatterlist. - * - * Note: This routine adjusts max_hw_segments to make room for appending - * the drain buffer. If you call blk_queue_max_segments() after calling - * this routine, you must set the limit to one fewer than your device - * can support otherwise there won't be room for the drain buffer. - */ -int blk_queue_dma_drain(struct request_queue *q, - dma_drain_needed_fn *dma_drain_needed, - void *buf, unsigned int size) -{ - if (queue_max_segments(q) < 2) - return -EINVAL; - /* make room for appending the drain */ - blk_queue_max_segments(q, queue_max_segments(q) - 1); - q->dma_drain_needed = dma_drain_needed; - q->dma_drain_buffer = buf; - q->dma_drain_size = size; - - return 0; -} -EXPORT_SYMBOL_GPL(blk_queue_dma_drain); - /** * blk_queue_segment_boundary - set boundary rules for segment merging * @q: the request queue for the device diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 36e588d88b95..feb13b8f93d7 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1017,16 +1017,11 @@ void ata_scsi_sdev_config(struct scsi_device *sdev) * RETURNS: * 1 if ; otherwise, 0. */ -static int atapi_drain_needed(struct request *rq) +bool ata_scsi_dma_need_drain(struct request *rq) { - if (likely(!blk_rq_is_passthrough(rq))) - return 0; - - if (!blk_rq_bytes(rq) || op_is_write(req_op(rq))) - return 0; - return atapi_cmd_type(scsi_req(rq)->cmd[0]) == ATAPI_MISC; } +EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain); int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) { @@ -1039,21 +1034,21 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) blk_queue_max_hw_sectors(q, dev->max_sectors); if (dev->class == ATA_DEV_ATAPI) { - void *buf; - sdev->sector_size = ATA_SECT_SIZE; /* set DMA padding */ blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1); - /* configure draining */ - buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); - if (!buf) { + /* make room for appending the drain */ + blk_queue_max_segments(q, queue_max_segments(q) - 1); + + sdev->dma_drain_len = ATAPI_MAX_DRAIN; + sdev->dma_drain_buf = kmalloc(sdev->dma_drain_len, + q->bounce_gfp | GFP_KERNEL); + if (!sdev->dma_drain_buf) { ata_dev_err(dev, "drain buffer allocation failed\n"); return -ENOMEM; } - - blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); } else { sdev->sector_size = ata_id_logical_sector_size(dev->id); sdev->manage_start_stop = 1; @@ -1135,7 +1130,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_slave_config); void ata_scsi_slave_destroy(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); - struct request_queue *q = sdev->request_queue; unsigned long flags; struct ata_device *dev; @@ -1152,9 +1146,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) } spin_unlock_irqrestore(ap->lock, flags); - kfree(q->dma_drain_buffer); - q->dma_drain_buffer = NULL; - q->dma_drain_size = 0; + kfree(sdev->dma_drain_buf); } EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 274dd3ffa66b..b561c6dbda6b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -978,6 +978,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) scsi_io_completion_action(cmd, result); } +static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev, + struct request *rq) +{ + return sdev->dma_drain_len && blk_rq_is_passthrough(rq) && + !op_is_write(req_op(rq)) && + sdev->host->hostt->dma_need_drain(rq); +} + /* * Function: scsi_init_io() * @@ -991,26 +999,47 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) */ blk_status_t scsi_init_io(struct scsi_cmnd *cmd) { + struct scsi_device *sdev = cmd->device; struct request *rq = cmd->request; + unsigned short nr_segs = blk_rq_nr_phys_segments(rq); + struct scatterlist *last_sg = NULL; blk_status_t ret; + bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq); int count; - if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq))) + if (WARN_ON_ONCE(!nr_segs)) return BLK_STS_IOERR; + /* + * Make sure there is space for the drain. The driver must adjust + * max_hw_segments to be prepared for this. + */ + if (need_drain) + nr_segs++; + /* * If sg table allocation fails, requeue request later. */ - if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, - blk_rq_nr_phys_segments(rq), cmd->sdb.table.sgl, - SCSI_INLINE_SG_CNT))) + if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs, + cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT))) return BLK_STS_RESOURCE; /* * Next, walk the list, and fill in the addresses and sizes of * each segment. */ - count = blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl); + count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg); + + if (need_drain) { + sg_unmark_end(last_sg); + last_sg = sg_next(last_sg); + sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len); + sg_mark_end(last_sg); + + rq->extra_len += sdev->dma_drain_len; + count++; + } + BUG_ON(count > cmd->sdb.table.nents); cmd->sdb.table.nents = count; cmd->sdb.length = blk_rq_payload_bytes(rq); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 496dc9491026..8e4726bce498 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -288,7 +288,6 @@ struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); struct bio_vec; -typedef int (dma_drain_needed_fn)(struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ @@ -397,7 +396,6 @@ struct request_queue { struct rq_qos *rq_qos; make_request_fn *make_request_fn; - dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; @@ -467,8 +465,6 @@ struct request_queue { */ unsigned long nr_requests; /* Max # of requests */ - unsigned int dma_drain_size; - void *dma_drain_buffer; unsigned int dma_pad_mask; unsigned int dma_alignment; @@ -1097,9 +1093,6 @@ extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); -extern int blk_queue_dma_drain(struct request_queue *q, - dma_drain_needed_fn *dma_drain_needed, - void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); diff --git a/include/linux/libata.h b/include/linux/libata.h index cffa4714bfa8..af832852e620 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,6 +1092,7 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); +bool ata_scsi_dma_need_drain(struct request *rq); extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, unsigned int cmd, void __user *arg); extern bool ata_link_online(struct ata_link *link); @@ -1387,6 +1388,7 @@ extern struct device_attribute *ata_common_sdev_attrs[]; .ioctl = ata_scsi_ioctl, \ ATA_SCSI_COMPAT_IOCTL \ .queuecommand = ata_scsi_queuecmd, \ + .dma_need_drain = ata_scsi_dma_need_drain, \ .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ .this_id = ATA_SHT_THIS_ID, \ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c3cba2aaf934..bc5909033d13 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -229,6 +229,9 @@ struct scsi_device { struct scsi_device_handler *handler; void *handler_data; + size_t dma_drain_len; + void *dma_drain_buf; + unsigned char access_state; struct mutex state_mutex; enum scsi_device_state sdev_state; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 822e8cda8d9b..46ef8cccc982 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -270,6 +270,13 @@ struct scsi_host_template { */ int (* map_queues)(struct Scsi_Host *shost); + /* + * Check if scatterlists need to be padded for DMA draining. + * + * Status: OPTIONAL + */ + bool (* dma_need_drain)(struct request *rq); + /* * This function determines the BIOS parameters for a given * harddisk. These tend to be numbers that are made up by -- 2.25.1