Re: [PATCH 1/2] block: fix surprise removal for drivers calling blk_set_queue_dying

Markus Blöchl <Markus.Bloechl@xxxxxxxxxxxxx> · Wed, 16 Feb 2022 16:49:50 +0100

On Wed, Feb 16, 2022 at 04:09:00PM +0100, Christoph Hellwig wrote:
> Various block drivers call blk_set_queue_dying to mark a disk as dead due
> to surprise removal events, but since commit 8e141f9eb803 that doesn't
> work given that the GD_DEAD flag needs to be set to stop I/O.
> 
> Replace the driver calls to blk_set_queue_dying with a new (and properly
> documented) blk_mark_disk_dead API, and fold blk_set_queue_dying into the
> only remaining caller.
> 
> Fixes: 8e141f9eb803 ("block: drain file system I/O on del_gendisk")
> Reported-by: Markus Blöchl <markus.bloechl@xxxxxxxxxxxxx>
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  block/blk-core.c                  | 18 +++++++++++++-----
>  drivers/block/mtip32xx/mtip32xx.c |  2 +-
>  drivers/block/rbd.c               |  2 +-
>  drivers/block/xen-blkfront.c      |  2 +-
>  drivers/md/dm.c                   |  2 +-
>  drivers/nvme/host/core.c          |  2 +-
>  drivers/nvme/host/multipath.c     |  2 +-
>  include/linux/blkdev.h            |  3 ++-
>  8 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index d93e3bb9a769b..15d5c5ba5bbe5 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -284,12 +284,19 @@ void blk_queue_start_drain(struct request_queue *q)
>  	wake_up_all(&q->mq_freeze_wq);
>  }
>  
> -void blk_set_queue_dying(struct request_queue *q)
> +/**
> + * blk_set_disk_dead - mark a disk as dead
> + * @disk: disk to mark as dead
> + *
> + * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
> + * to this disk.
> + */
> +void blk_mark_disk_dead(struct gendisk *disk)
>  {
> -	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> -	blk_queue_start_drain(q);
> +	set_bit(GD_DEAD, &disk->state);
> +	blk_queue_start_drain(disk->queue);
>  }
> -EXPORT_SYMBOL_GPL(blk_set_queue_dying);
> +EXPORT_SYMBOL_GPL(blk_mark_disk_dead);

I might have missed something here, but assuming I am a driver which
employs multiple different queues, some with a disk attached to them,
some without (Is that possible? The admin queue e.g.?)
and I just lost my connection and want to notify everything below me
that their connection is dead.
Would I really want to kill disk queues differently from non-disk
queues?

How is the admin queue killed? Is it even?

>  
>  /**
>   * blk_cleanup_queue - shutdown a request queue
> @@ -308,7 +315,8 @@ void blk_cleanup_queue(struct request_queue *q)
>  	WARN_ON_ONCE(blk_queue_registered(q));
>  
>  	/* mark @q DYING, no new request or merges will be allowed afterwards */
> -	blk_set_queue_dying(q);
> +	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
> +	blk_queue_start_drain(q);
>  
>  	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
>  	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
> diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
> index e6005c2323281..2b588b62cbbb2 100644
> --- a/drivers/block/mtip32xx/mtip32xx.c
> +++ b/drivers/block/mtip32xx/mtip32xx.c
> @@ -4112,7 +4112,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
>  			"Completion workers still active!\n");
>  	}
>  
> -	blk_set_queue_dying(dd->queue);
> +	blk_mark_disk_dead(dd->disk);

This driver is weird, I did find are reliably hint that dd->disk always
exists here. At least mtip_block_remove() has an extra check for that.

It also only set QUEUE_FLAG_DEAD if it detects a surprise removal and
not QUEUE_FLAG_DYING.

>  	set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
>  
>  	/* Clean up the block layer. */
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 4203cdab8abfd..b844432bad20b 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -7185,7 +7185,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus,
>  		 * IO to complete/fail.
>  		 */
>  		blk_mq_freeze_queue(rbd_dev->disk->queue);
> -		blk_set_queue_dying(rbd_dev->disk->queue);
> +		blk_mark_disk_dead(rbd_dev->disk);
>  	}
>  
>  	del_gendisk(rbd_dev->disk);
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index ccd0dd0c6b83c..ca71a0585333f 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -2126,7 +2126,7 @@ static void blkfront_closing(struct blkfront_info *info)
>  
>  	/* No more blkif_request(). */
>  	blk_mq_stop_hw_queues(info->rq);
> -	blk_set_queue_dying(info->rq);
> +	blk_mark_disk_dead(info->gd);
>  	set_capacity(info->gd, 0);
>  
>  	for_each_rinfo(info, rinfo, i) {
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index dcbd6d201619d..997ace47bbd54 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -2077,7 +2077,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
>  	set_bit(DMF_FREEING, &md->flags);
>  	spin_unlock(&_minor_lock);
>  
> -	blk_set_queue_dying(md->queue);
> +	blk_mark_disk_dead(md->disk);
>  
>  	/*
>  	 * Take suspend_lock so that presuspend and postsuspend methods
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 79005ea1a33e3..469f23186159c 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4574,7 +4574,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
>  	if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
>  		return;
>  
> -	blk_set_queue_dying(ns->queue);
> +	blk_mark_disk_dead(ns->disk);
>  	nvme_start_ns_queue(ns);
>  
>  	set_capacity_and_notify(ns->disk, 0);
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index f8bf6606eb2fc..ff775235534cf 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -848,7 +848,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
>  {
>  	if (!head->disk)
>  		return;
> -	blk_set_queue_dying(head->disk->queue);
> +	blk_mark_disk_dead(head->disk);
>  	/* make sure all pending bios are cleaned up */
>  	kblockd_schedule_work(&head->requeue_work);
>  	flush_work(&head->requeue_work);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index f35aea98bc351..16b47035e4b06 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -748,7 +748,8 @@ extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
>  
>  bool __must_check blk_get_queue(struct request_queue *);
>  extern void blk_put_queue(struct request_queue *);
> -extern void blk_set_queue_dying(struct request_queue *);
> +
> +void blk_mark_disk_dead(struct gendisk *disk);
>  
>  #ifdef CONFIG_BLOCK
>  /*
> -- 
> 2.30.2
>