When zone is reset or finished, disk_zone_wplug_set_wp_offset() is called to update the zone write plug write pointer offset and to clear the zone error state (BLK_ZONE_WPLUG_ERROR flag) if it is set. However, this processing is missing dropping the reference to the zone write plug that was taken in disk_zone_wplug_set_error() when the error flag was first set. Furthermore, the error state handling must release the zone write plug lock to first execute a report zones command. When the report zone races with a reset or finish operation that clears the error, we can end up decrementing the zone write plug reference count twice: once in disk_zone_wplug_set_wp_offset() for the reset/finish operation and one more time in disk_zone_wplugs_work() once disk_zone_wplug_handle_error() completes. Fix this by introducing disk_zone_wplug_clear_error() as the symmetric function of disk_zone_wplug_set_error(). disk_zone_wplug_clear_error() decrements the zone write plug reference count obtained in disk_zone_wplug_set_error() only if the error handling has not started yet, that is, only if disk_zone_wplugs_work() has not yet taken the zone write plug off the error list. This ensure that either disk_zone_wplug_clear_error() or disk_zone_wplugs_work() drop the zone write plug reference count. Fixes: dd291d77cc90 ("block: Introduce zone write plugging") Signed-off-by: Damien Le Moal <dlemoal@xxxxxxxxxx> --- block/blk-zoned.c | 74 ++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index e92ae0729cf8..9bded29592e0 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -658,6 +658,53 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk, bio_list_merge(&zwplug->bio_list, &bl); } +static inline void disk_zone_wplug_set_error(struct gendisk *disk, + struct blk_zone_wplug *zwplug) +{ + if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) { + unsigned long flags; + + /* + * At this point, we already have a reference on the zone write + * plug. However, since we are going to add the plug to the disk + * zone write plugs work list, increase its reference count. + * This reference will be dropped in disk_zone_wplugs_work() + * once the error state is handled, or + * in disk_zone_wplug_clear_error() if the zone is reset or + * finished. + */ + zwplug->flags |= BLK_ZONE_WPLUG_ERROR; + atomic_inc(&zwplug->ref); + + spin_lock_irqsave(&disk->zone_wplugs_lock, flags); + list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list); + spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); + } +} + +static inline void disk_zone_wplug_clear_error(struct gendisk *disk, + struct blk_zone_wplug *zwplug) +{ + if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) { + unsigned long flags; + + /* + * We are racing with the error handling work which drops + * the reference on the zone write plug after handling the error + * state. So remove the plug from the error list and drop its + * reference count only if the error handling has not yet + * started, that is, if the zone write plug is still listed. + */ + spin_lock_irqsave(&disk->zone_wplugs_lock, flags); + if (!list_empty(&zwplug->link)) { + list_del_init(&zwplug->link); + zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR; + disk_put_zone_wplug(zwplug); + } + spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); + } +} + /* * Set a zone write plug write pointer offset to either 0 (zone reset case) * or to the zone size (zone finish case). This aborts all plugged BIOs, which @@ -691,12 +738,7 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk, * in a good state. So clear the error flag and decrement the * error count if we were in error state. */ - if (zwplug->flags & BLK_ZONE_WPLUG_ERROR) { - zwplug->flags &= ~BLK_ZONE_WPLUG_ERROR; - spin_lock(&disk->zone_wplugs_lock); - list_del_init(&zwplug->link); - spin_unlock(&disk->zone_wplugs_lock); - } + disk_zone_wplug_clear_error(disk, zwplug); /* * The zone write plug now has no BIO plugged: remove it from the @@ -885,26 +927,6 @@ void blk_zone_write_plug_attempt_merge(struct request *req) spin_unlock_irqrestore(&zwplug->lock, flags); } -static inline void disk_zone_wplug_set_error(struct gendisk *disk, - struct blk_zone_wplug *zwplug) -{ - if (!(zwplug->flags & BLK_ZONE_WPLUG_ERROR)) { - unsigned long flags; - - /* - * Increase the plug reference count. The reference will be - * dropped in disk_zone_wplugs_work() once the error state - * is handled. - */ - zwplug->flags |= BLK_ZONE_WPLUG_ERROR; - atomic_inc(&zwplug->ref); - - spin_lock_irqsave(&disk->zone_wplugs_lock, flags); - list_add_tail(&zwplug->link, &disk->zone_wplugs_err_list); - spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); - } -} - /* * Check and prepare a BIO for submission by incrementing the write pointer * offset of its zone write plug and changing zone append operations into -- 2.44.0