Re: [PATCH 03/11] zbd: finish zones with remainder smaller than minimum write block size

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2022-10-21 at 15:34 +0900, Shin'ichiro Kawasaki wrote:
> When zonemode is zbd and block size is not divisor of zone size, write
> target zone selection does not work as expected. When the write is
> random write and the device has max open zone limit, the random write is
> repeated to the zones selected up to the max open zone limit. All writes
> are repeated only to the zones. When the write is sequential write,
> write is done only for the first zone. The cause of such unexpected zone
> selection is current write target zone selection logic. It selects write
> target zones within open zones. When block size is not divisor of zone
> size, the selected open zone has only remainder of writable blocks
> smaller than the block size. Fio reset such zone after zone selection
> and continue writing to it. This zone reset is required not to exceed
> the limit of max_open_zones option or max_active_zone limit of the zoned
> device, but it does not simulate the workload.
> 
> To avoid the zone reset and unexpected write to same zone, fix write
> target zone handling of zones with remainder smaller than write block
> size. Do not reset but finish such zone not to exceed the max_open_zones
> option and max_active_zone limit. Then choose the next to the finished
> zone as the write target. To implement this, add helper functions
> zbd_zone_remainder_not_writable() and zbd_finish_zone().
> 
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx>
> ---
>  zbd.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 78 insertions(+)
> 
> diff --git a/zbd.c b/zbd.c
> index 627fb968..55c5c751 100644
> --- a/zbd.c
> +++ b/zbd.c
> @@ -70,6 +70,15 @@ static inline uint64_t zbd_zone_capacity_end(const struct
> fio_zone_info *z)
>         return z->start + z->capacity;
>  }
>  
> +/**
> + * zbd_zone_remainder - Return bytes can be written to the zone.
> + * @z: zone info pointer.
> + */
> +static inline uint64_t zbd_zone_remainder(struct fio_zone_info *z)
> +{
> +       return zbd_zone_capacity_end(z) - z->wp;
> +}
> +

This new helper has a very useful semantics. You can get some more mileage
out of it by using it elsewhere to clean up the code. I tried to add these
changes -

diff --git a/zbd.c b/zbd.c
index 55c5c751..a19b3a34 100644
--- a/zbd.c
+++ b/zbd.c
@@ -92,8 +92,7 @@ static bool zbd_zone_full(const struct fio_file *f, struct
fio_zone_info *z,
 {
 	assert((required & 511) == 0);
 
-	return z->has_wp &&
-		z->wp + required > zbd_zone_capacity_end(z);
+	return z->has_wp && required > zbd_zone_remainder(z);
 }
 
 static void zone_lock(struct thread_data *td, const struct fio_file *f,
@@ -487,7 +486,7 @@ static bool zbd_open_zone(struct thread_data *td, const
struct fio_file *f,
 		 * already in-flight, handle it as a full zone instead of an
 		 * open zone.
 		 */
-		if (z->wp >= zbd_zone_capacity_end(z))
+		if (!zbd_zone_remainder(z))
 			res = false;
 		goto out;
 	}
@@ -1415,7 +1414,7 @@ found_candidate_zone:
 	/* Both z->mutex and zbdi->mutex are held. */
 
 examine_zone:
-	if (z->wp + min_bs <= zbd_zone_capacity_end(z)) {
+	if (zbd_zone_remainder(z) >= min_bs) {
 		pthread_mutex_unlock(&zbdi->mutex);
 		goto out;
 	}
@@ -1480,7 +1479,7 @@ retry:
 		z = zbd_get_zone(f, zone_idx);
 
 		zone_lock(td, f, z);
-		if (z->wp + min_bs <= zbd_zone_capacity_end(z))
+		if (zbd_zone_remainder(z) >= min_bs)
 			goto out;
 		pthread_mutex_lock(&zbdi->mutex);
 	}

and these seem to be ok. If you decide to include them, you may want to put the
introduction of zbd_zone_remainder() into a separate patch.

>  /**
>   * zbd_zone_full - verify whether a minimum number of bytes remain in a zone
>   * @f: file pointer.
> @@ -322,6 +331,44 @@ static void zbd_close_zone(struct thread_data *td, const
> struct fio_file *f,
>         z->open = 0;
>  }
>  
> +/**
> + * zbd_finish_zone - finish the specified zone
> + * @td: FIO thread data.
> + * @f: FIO file for which to finish a zone
> + * @z: Zone to finish.
> + *
> + * Finish the zone at @offset with open or close status.
> + */
> +static int zbd_finish_zone(struct thread_data *td, struct fio_file *f,
> +                          struct fio_zone_info *z)
> +{
> +       uint64_t offset = z->start;
> +       uint64_t length = f->zbd_info->zone_size;
> +       int ret = 0;
> +
> +       switch (f->zbd_info->model) {
> +       case ZBD_HOST_AWARE:
> +       case ZBD_HOST_MANAGED:
> +               if (td->io_ops && td->io_ops->finish_zone)
> +                       ret = td->io_ops->finish_zone(td, f, offset, length);
> +               else
> +                       ret = blkzoned_finish_zone(td, f, offset, length);
> +               break;
> +       default:
> +               break;
> +       }
> +
> +       if (ret < 0) {
> +               td_verror(td, errno, "finish zone failed");
> +               log_err("%s: finish zone at sector %"PRIu64" failed (%d).\n",
> +                       f->file_name, offset >> 9, errno);
> +       } else {
> +               z->wp = (z+1)->start;
> +       }
> +
> +       return ret;
> +}
> +
>  /**
>   * zbd_reset_zones - Reset a range of zones.
>   * @td: fio thread data.
> @@ -1941,6 +1988,33 @@ enum io_u_action zbd_adjust_block(struct thread_data
> *td, struct io_u *io_u)
>                         goto eof;
>                 }
>  
> +retry:
> +               if (zbd_zone_remainder(zb) > 0 &&
> +                   zbd_zone_remainder(zb) < min_bs) {

You can avoid using this label by putting all the code until "goto retry" into a
while loop -

		while (zbd_zone_remainder(zb) > 0 &&
			zbd_zone_remainder(zb) < min_bs) {


> +                       pthread_mutex_lock(&f->zbd_info->mutex);
> +                       zbd_close_zone(td, f, zb);
> +                       pthread_mutex_unlock(&f->zbd_info->mutex);
> +                       dprint(FD_ZBD,
> +                              "%s: finish zone %d\n",
> +                              f->file_name, zbd_zone_idx(f, zb));
> +                       io_u_quiesce(td);
> +                       zbd_finish_zone(td, f, zb);
> +                       if (zbd_zone_idx(f, zb) + 1 >= f->max_zone) {
> +                               if (!td_random(td))
> +                                       goto eof;
> +                       }
> +                       zone_unlock(zb);
> +
> +                       /* choose next zone with write pointer */

"Find the next write pointer zone" sounds a bit better here

> +                       do {
> +                               zb++;
> +                               if (zbd_zone_idx(f, zb) >= f->max_zone)
> +                                       zb = zbd_get_zone(f, f->min_zone);
> +                       } while (!zb->has_wp);
> +
> +                       zone_lock(td, f, zb);
> +               }
> +
>                 if (!zbd_open_zone(td, f, zb)) {
>                         zone_unlock(zb);
>                         zb = zbd_convert_to_open_zone(td, io_u);
> @@ -1951,6 +2025,10 @@ enum io_u_action zbd_adjust_block(struct thread_data
> *td, struct io_u *io_u)
>                         }
>                 }
>  
> +               if (zbd_zone_remainder(zb) > 0 &&
> +                   zbd_zone_remainder(zb) < min_bs)
> +                       goto retry;

If you use the while loop, the if above becomes unnecessary

> +
>                 /* Check whether the zone reset threshold has been exceeded */
>                 if (td->o.zrf.u.f) {
>                         if (zbdi->wp_sectors_with_data >= f->io_size * td-
> >o.zrt.u.f &&





[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux