On 5/24/21 9:25 PM, Damien Le Moal wrote:
A target map method requesting the requeue of a bio with
DM_MAPIO_REQUEUE or completing it with DM_ENDIO_REQUEUE can cause
unaligned write errors if the bio is a write operation targeting a
sequential zone. If a zoned target request such a requeue, warn about
it and kill the IO.
The function dm_is_zone_write() is introduced to detect write operations
to zoned targets.
This change does not affect the target drivers supporting zoned devices
and exposing a zoned device, namely dm-crypt, dm-linear and dm-flakey as
none of these targets ever request a requeue.
Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxx>
Reviewed-by: Hannes Reinecke <hare@xxxxxxx>
---
drivers/md/dm-zone.c | 17 +++++++++++++++++
drivers/md/dm.c | 18 +++++++++++++++---
drivers/md/dm.h | 5 +++++
3 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index b42474043249..edc3bbb45637 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -104,6 +104,23 @@ int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector,
}
EXPORT_SYMBOL_GPL(dm_report_zones);
+bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
+{
+ struct request_queue *q = md->queue;
+
+ if (!blk_queue_is_zoned(q))
+ return false;
+
+ switch (bio_op(bio)) {
+ case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE:
+ return !op_is_flush(bio->bi_opf) && bio_sectors(bio);
+ default:
+ return false;
+ }
+}
+
void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
{
if (!blk_queue_is_zoned(q))
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 45d2dc2ee844..4426019a89cc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -846,11 +846,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
* Target requested pushing back the I/O.
*/
spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md))
+ if (__noflush_suspending(md) &&
+ !WARN_ON_ONCE(dm_is_zone_write(md, bio)))
/* NOTE early return due to BLK_STS_DM_REQUEUE below */
bio_list_add_head(&md->deferred, io->orig_bio);
else
- /* noflush suspend was interrupted. */
+ /*
+ * noflush suspend was interrupted or this is
+ * a write to a zoned target.
+ */
io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
@@ -947,7 +951,15 @@ static void clone_endio(struct bio *bio)
int r = endio(tio->ti, bio, &error);
switch (r) {
case DM_ENDIO_REQUEUE:
- error = BLK_STS_DM_REQUEUE;
+ /*
+ * Requeuing writes to a sequential zone of a zoned
+ * target will break the sequential write pattern:
+ * fail such IO.
+ */
+ if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
+ error = BLK_STS_IOERR;
+ else
+ error = BLK_STS_DM_REQUEUE;
fallthrough;
case DM_ENDIO_DONE:
break;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index fdf1536a4b62..39c243258e24 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -107,8 +107,13 @@ void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
#ifdef CONFIG_BLK_DEV_ZONED
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
#else
#define dm_blk_report_zones NULL
+static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
+{
+ return false;
+}
#endif
/*-----------------------------------------------------------------
Reviewed-by: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx>
--
Himanshu Madhani Oracle Linux Engineering