[PATCH v16 05/26] blk-zoned: Fix a deadlock triggered by unaligned writes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



If the queue is filled with unaligned writes then the following
deadlock occurs:

Call Trace:
 <TASK>
 __schedule+0x8cc/0x2190
 schedule+0xdd/0x2b0
 blk_queue_enter+0x2ce/0x4f0
 blk_mq_alloc_request+0x303/0x810
 scsi_execute_cmd+0x3f4/0x7b0
 sd_zbc_do_report_zones+0x19e/0x4c0
 sd_zbc_report_zones+0x304/0x920
 disk_zone_wplug_handle_error+0x237/0x920
 disk_zone_wplugs_work+0x17e/0x430
 process_one_work+0xdd0/0x1490
 worker_thread+0x5eb/0x1010
 kthread+0x2e5/0x3b0
 ret_from_fork+0x3a/0x80
 </TASK>

Fix this deadlock by removing the disk->fops->report_zones() call and by
deriving the write pointer information from successfully completed zoned
writes.

Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 block/blk-zoned.c | 69 +++++++++++++++++++----------------------------
 block/blk.h       |  4 ++-
 2 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index b570b773e65f..284820b29285 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -56,6 +56,8 @@ static const char *const zone_cond_name[] = {
  * @zone_no: The number of the zone the plug is managing.
  * @wp_offset: The zone write pointer location relative to the start of the zone
  *             as a number of 512B sectors.
+ * @wp_offset_compl: End offset for completed zoned writes as a number of 512
+ *		     byte sectors.
  * @bio_list: The list of BIOs that are currently plugged.
  * @bio_work: Work struct to handle issuing of plugged BIOs
  * @rcu_head: RCU head to free zone write plugs with an RCU grace period.
@@ -69,6 +71,7 @@ struct blk_zone_wplug {
 	unsigned int		flags;
 	unsigned int		zone_no;
 	unsigned int		wp_offset;
+	unsigned int		wp_offset_compl;
 	struct bio_list		bio_list;
 	struct work_struct	bio_work;
 	struct rcu_head		rcu_head;
@@ -531,6 +534,7 @@ static struct blk_zone_wplug *disk_get_and_lock_zone_wplug(struct gendisk *disk,
 	zwplug->flags = 0;
 	zwplug->zone_no = zno;
 	zwplug->wp_offset = sector & (disk->queue->limits.chunk_sectors - 1);
+	zwplug->wp_offset_compl = 0;
 	bio_list_init(&zwplug->bio_list);
 	INIT_WORK(&zwplug->bio_work, blk_zone_wplug_bio_work);
 	zwplug->disk = disk;
@@ -1226,6 +1230,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
 	struct blk_zone_wplug *zwplug =
 		disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
+	unsigned int end_sector;
 	unsigned long flags;
 
 	if (WARN_ON_ONCE(!zwplug))
@@ -1243,11 +1248,24 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
 		bio->bi_opf |= REQ_OP_ZONE_APPEND;
 	}
 
-	/*
-	 * If the BIO failed, mark the plug as having an error to trigger
-	 * recovery.
-	 */
-	if (bio->bi_status != BLK_STS_OK) {
+	if (bio->bi_status == BLK_STS_OK) {
+		switch (bio_op(bio)) {
+		case REQ_OP_WRITE:
+		case REQ_OP_ZONE_APPEND:
+		case REQ_OP_WRITE_ZEROES:
+			end_sector = bdev_offset_from_zone_start(disk->part0,
+				     bio->bi_iter.bi_sector + bio_sectors(bio));
+			if (end_sector > zwplug->wp_offset_compl)
+				zwplug->wp_offset_compl = end_sector;
+			break;
+		default:
+			break;
+		}
+	} else {
+		/*
+		 * If the BIO failed, mark the plug as having an error to
+		 * trigger recovery.
+		 */
 		spin_lock_irqsave(&zwplug->lock, flags);
 		disk_zone_wplug_set_error(disk, zwplug);
 		spin_unlock_irqrestore(&zwplug->lock, flags);
@@ -1388,30 +1406,10 @@ static unsigned int blk_zone_wp_offset(struct blk_zone *zone)
 	}
 }
 
-static int blk_zone_wplug_report_zone_cb(struct blk_zone *zone,
-					 unsigned int idx, void *data)
-{
-	struct blk_zone *zonep = data;
-
-	*zonep = *zone;
-	return 0;
-}
-
 static void disk_zone_wplug_handle_error(struct gendisk *disk,
 					 struct blk_zone_wplug *zwplug)
 {
-	sector_t zone_start_sector =
-		bdev_zone_sectors(disk->part0) * zwplug->zone_no;
-	unsigned int noio_flag;
-	struct blk_zone zone;
 	unsigned long flags;
-	int ret;
-
-	/* Get the current zone information from the device. */
-	noio_flag = memalloc_noio_save();
-	ret = disk->fops->report_zones(disk, zone_start_sector, 1,
-				       blk_zone_wplug_report_zone_cb, &zone);
-	memalloc_noio_restore(noio_flag);
 
 	spin_lock_irqsave(&zwplug->lock, flags);
 
@@ -1425,19 +1423,8 @@ static void disk_zone_wplug_handle_error(struct gendisk *disk,
 
 	zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR;
 
-	if (ret != 1) {
-		/*
-		 * We failed to get the zone information, meaning that something
-		 * is likely really wrong with the device. Abort all remaining
-		 * plugged BIOs as otherwise we could endup waiting forever on
-		 * plugged BIOs to complete if there is a queue freeze on-going.
-		 */
-		disk_zone_wplug_abort(zwplug);
-		goto unplug;
-	}
-
 	/* Update the zone write pointer offset. */
-	zwplug->wp_offset = blk_zone_wp_offset(&zone);
+	zwplug->wp_offset = zwplug->wp_offset_compl;
 	disk_zone_wplug_abort_unaligned(disk, zwplug);
 
 	/* Restart BIO submission if we still have any BIO left. */
@@ -1446,7 +1433,6 @@ static void disk_zone_wplug_handle_error(struct gendisk *disk,
 		goto unlock;
 	}
 
-unplug:
 	zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED;
 	if (disk_should_remove_zone_wplug(disk, zwplug))
 		disk_remove_zone_wplug(disk, zwplug);
@@ -1978,7 +1964,7 @@ EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
 static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
 				  struct seq_file *m)
 {
-	unsigned int zwp_wp_offset, zwp_flags;
+	unsigned int zwp_wp_offset, zwp_wp_offset_compl, zwp_flags;
 	unsigned int zwp_zone_no, zwp_ref;
 	unsigned int zwp_bio_list_size;
 	unsigned long flags;
@@ -1988,14 +1974,15 @@ static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
 	zwp_flags = zwplug->flags;
 	zwp_ref = refcount_read(&zwplug->ref);
 	zwp_wp_offset = zwplug->wp_offset;
+	zwp_wp_offset_compl = zwplug->wp_offset_compl;
 	zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
 	spin_unlock_irqrestore(&zwplug->lock, flags);
 
 	bool all_zwr_inserted = blk_zone_all_zwr_inserted(zwplug);
 
-	seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u bio_list_size %u all_zwr_inserted %d\n",
+	seq_printf(m, "zone_no %u flags 0x%x ref %u wp_offset %u wp_offset_compl %u bio_list_size %u all_zwr_inserted %d\n",
 		   zwp_zone_no, zwp_flags, zwp_ref, zwp_wp_offset,
-		   zwp_bio_list_size, all_zwr_inserted);
+		   zwp_wp_offset_compl, zwp_bio_list_size, all_zwr_inserted);
 }
 
 int queue_zone_wplugs_show(void *data, struct seq_file *m)
diff --git a/block/blk.h b/block/blk.h
index be945db6298d..88a6e258eafe 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -470,8 +470,10 @@ static inline void blk_zone_update_request_bio(struct request *rq,
 	 * the original BIO sector so that blk_zone_write_plug_bio_endio() can
 	 * lookup the zone write plug.
 	 */
-	if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio))
+	if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) {
 		bio->bi_iter.bi_sector = rq->__sector;
+		bio->bi_iter.bi_size = rq->__data_len;
+	}
 }
 
 void blk_zone_write_plug_requeue_request(struct request *rq);




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux