On 3/10/25 07:28, Benjamin Marzinski wrote: > If dm_table_set_restrictions() fails while swapping tables, > device-mapper will continue using the previous table. It must be sure to > leave the mapped_device in it's previous state on failure. Otherwise > device-mapper could end up using the old table with settings from the > unused table. > > Do not update the mapped device in dm_set_zones_restrictions(). Wait > till after dm_table_set_restrictions() is sure to succeed to update the > md zoned settings. Do the same with the dax settings, and if > dm_revalidate_zones() fails, restore the original queue limits. > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > --- > drivers/md/dm-table.c | 24 ++++++++++++++++-------- > drivers/md/dm-zone.c | 26 ++++++++++++++++++-------- > drivers/md/dm.h | 1 + > 3 files changed, 35 insertions(+), 16 deletions(-) > > diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c > index 0ef5203387b2..4003e84af11d 100644 > --- a/drivers/md/dm-table.c > +++ b/drivers/md/dm-table.c > @@ -1836,6 +1836,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, > struct queue_limits *limits) > { > int r; > + struct queue_limits old_limits; > > if (!dm_table_supports_nowait(t)) > limits->features &= ~BLK_FEAT_NOWAIT; > @@ -1862,16 +1863,11 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, > if (dm_table_supports_flush(t)) > limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA; > > - if (dm_table_supports_dax(t, device_not_dax_capable)) { > + if (dm_table_supports_dax(t, device_not_dax_capable)) > limits->features |= BLK_FEAT_DAX; > - if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) > - set_dax_synchronous(t->md->dax_dev); > - } else > + else > limits->features &= ~BLK_FEAT_DAX; > > - if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) > - dax_write_cache(t->md->dax_dev, true); > - > /* For a zoned table, setup the zone related queue attributes. */ > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && > (limits->features & BLK_FEAT_ZONED)) { > @@ -1883,6 +1879,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, > if (dm_table_supports_atomic_writes(t)) > limits->features |= BLK_FEAT_ATOMIC_WRITES; > > + old_limits = q->limits; I am not sure this is safe to do like this since the user may be simultaneously changing attributes, which would result in the old_limits struct being in an inconsistent state. So shouldn't we hold q->limits_lock here ? We probably want a queue_limits_get() helper for that though. > r = queue_limits_set(q, limits); ...Or, we could modify queue_limits_set() to also return the old limit struct under the q limits_lock. That maybe easier. > if (r) > return r; > @@ -1894,10 +1891,21 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && > (limits->features & BLK_FEAT_ZONED)) { > r = dm_revalidate_zones(t, q); > - if (r) > + if (r) { > + queue_limits_set(q, &old_limits); > return r; > + } > } > > + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) > + dm_finalize_zone_settings(t, limits); > + > + if (dm_table_supports_dax(t, device_not_dax_synchronous_capable)) > + set_dax_synchronous(t->md->dax_dev); > + > + if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) > + dax_write_cache(t->md->dax_dev, true); > + > dm_update_crypto_profile(q, t); > return 0; > } > diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c > index 20edd3fabbab..681058feb63b 100644 > --- a/drivers/md/dm-zone.c > +++ b/drivers/md/dm-zone.c > @@ -340,12 +340,8 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, > * mapped device queue as needing zone append emulation. > */ > WARN_ON_ONCE(queue_is_mq(q)); > - if (dm_table_supports_zone_append(t)) { > - clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); > - } else { > - set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); > + if (!dm_table_supports_zone_append(t)) > lim->max_hw_zone_append_sectors = 0; > - } > > /* > * Determine the max open and max active zone limits for the mapped > @@ -383,9 +379,6 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, > lim->zone_write_granularity = 0; > lim->chunk_sectors = 0; > lim->features &= ~BLK_FEAT_ZONED; > - clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); > - md->nr_zones = 0; > - disk->nr_zones = 0; > return 0; > } > > @@ -408,6 +401,23 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, > return 0; > } > > +void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim) > +{ > + struct mapped_device *md = t->md; > + > + if (lim->features & BLK_FEAT_ZONED) { > + if (dm_table_supports_zone_append(t)) > + clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); > + else > + set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); > + } else { > + clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); > + md->nr_zones = 0; > + md->disk->nr_zones = 0; > + } > +} > + > + > /* > * IO completion callback called from clone_endio(). > */ > diff --git a/drivers/md/dm.h b/drivers/md/dm.h > index a0a8ff119815..e5d3a9f46a91 100644 > --- a/drivers/md/dm.h > +++ b/drivers/md/dm.h > @@ -102,6 +102,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t); > int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, > struct queue_limits *lim); > int dm_revalidate_zones(struct dm_table *t, struct request_queue *q); > +void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim); > void dm_zone_endio(struct dm_io *io, struct bio *clone); > #ifdef CONFIG_BLK_DEV_ZONED > int dm_blk_report_zones(struct gendisk *disk, sector_t sector, -- Damien Le Moal Western Digital Research