Would creating new profiles for all the I/O patterns be particularly difficult? I'm sure you're much more familiar with the FIO codebase than I am, but it seems to me that all you'd need to do for randoms is move the logic from the zbc_adjust_block cases upstream into the various methods called by get_off_from_method(), or possibly modify the existing methods to work differently when working on a ZBC drive. For sequentials it seems like you'd just have to move the logic into get_next_seq_offset(). It also seems to me that it might be better to have get_next_block() pick a valid area to begin with. The main benefit to doing this that I can see would be to allow much more control over the number of open zones, which I think will be of particular interest in testing ZBC drive performance. Additionally, it might be worthwhile to have an option allows the workload to pick a new zone instead of resetting the write pointer of a zone when writing to a full zone. This would also be made easier with a more upstream approach, because you wouldn't need to retry and get a new offset, you could just avoid full zones entirely. Or you could keep track of which zones are open and add/replace open zones as necessary. Phillip On Thu, Mar 15, 2018 at 10:43 AM, Bart Van Assche <Bart.VanAssche@xxxxxxx> wrote: > On Thu, 2018-03-15 at 16:30 +0000, Sitsofe Wheeler wrote: > >> On 15 March 2018 at 16:15, Kris Davis <Kris.Davis@xxxxxxx> wrote: > >> > Despite the desire to have fio work with Host Managed devices independent of the engine, a Host Managed device operation is different than a traditional block device, and the kernel also has to > >> > manage it differently. Thus, I would still recommend creating a new IO engine for use with Host Managed devices, we would not want the additional overhead associated with SMR to impact the > >> > standard aio engine. We have mostly used the fio SG engine along with external operations in testing of SMR. > >> > > >> > Here is the way we envision a new fio engine might work with Host Managed devices: > >> > >> It's definitely going to need something special. I think last time > >> round (https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_fio_msg06646.html&d=DwIGaQ&c=IGDlg0lD0b-nebmJJ0Kp8A&r=eNMOVQH16Aa4ThAFVwj-O7goG7k06cW3W6DO_yXnzSg&m=icSXCK6t72RPOJKtqT2YyV-6X8g2HQnSmGSFsvlJgOo&s=ti4Mozyfv9Huy4ZNAs9GDPnvDhaiz40m16tWRS4cOVM&e= ) I suggested a > >> profile but perhaps that won't be enough. I doubt an ioengine would be > >> enough because you're going to have interact with the next offset code > >> etc. unless you're going to fake I/Os done to "wrong" regions... > > > > Hello Sitsofe, > > > > Adding support for ZBC drives as a profile has a significant disadvantage, > > namely that the different I/O patterns (sequential read, sequential write, > > random read, random write, ...) all have to be reimplemented. That's why I'm > > considering to add ZBC support by modifying what get_next_block() produces. > > Can you have a look at the (barely tested) patch below? > > > > Thanks, > > > > Bart. > > > > > > diff --git a/Makefile b/Makefile > > index 8f4871c63528..44dd7f3439f6 100644 > > --- a/Makefile > > +++ b/Makefile > > @@ -145,6 +145,9 @@ endif > > ifdef CONFIG_LIBPMEM > > SOURCE += engines/libpmem.c > > endif > > +ifdef HAVE_LINUX_BLKZONED_H > > + SOURCE += zbc.c > > +endif > > > > ifeq ($(CONFIG_TARGET_OS), Linux) > > SOURCE += diskutil.c fifo.c blktrace.c cgroup.c trim.c engines/sg.c \ > > diff --git a/configure b/configure > > index 4442a1cc0d75..688e86201320 100755 > > --- a/configure > > +++ b/configure > > @@ -2104,6 +2104,27 @@ if compile_prog "" "" "valgrind_dev"; then > > fi > > print_config "Valgrind headers" "$valgrind_dev" > > > > +########################################## > > +# <linux/blkzoned.h> probe > > +if test "$linux_blkzoned_h" != "yes" ; then > > + linux_blkzoned_h="no" > > +fi > > +cat > $TMPC << EOF > > +#include <linux/blkzoned.h> > > +int main(int argc, char **argv) > > +{ > > + return 0; > > +} > > +EOF > > +if compile_prog "" "" "linux_blkzoned_h"; then > > + linux_blkzoned_h="yes" > > +fi > > +print_config "<linux/blkzoned.h>" "$linux_blkzoned_h" > > +if test "$linux_blkzoned_h" = "yes" ; then > > + output_sym "HAVE_LINUX_BLKZONED_H" > > +fi > > + > > +########################################## > > # check march=armv8-a+crc+crypto > > if test "$march_armv8_a_crc_crypto" != "yes" ; then > > march_armv8_a_crc_crypto="no" > > diff --git a/debug.h b/debug.h > > index b8718ddc225f..c69c8079beda 100644 > > --- a/debug.h > > +++ b/debug.h > > @@ -24,6 +24,7 @@ enum { > > FD_COMPRESS, > > FD_STEADYSTATE, > > FD_HELPERTHREAD, > > + FD_ZBC, > > FD_DEBUG_MAX, > > }; > > > > diff --git a/file.h b/file.h > > index 8fd34b136c23..42b304629824 100644 > > --- a/file.h > > +++ b/file.h > > @@ -10,6 +10,9 @@ > > #include "lib/lfsr.h" > > #include "lib/gauss.h" > > > > +/* Forward declarations */ > > +struct zoned_block_device_info; > > + > > /* > > * The type of object we are working on > > */ > > @@ -97,6 +100,11 @@ struct fio_file { > > uint64_t file_offset; > > uint64_t io_size; > > > > + /* > > + * Zoned device information > > + */ > > + struct zoned_block_device_info *zbd_info; > > + > > /* > > * Track last end and last start of IO for a given data direction > > */ > > diff --git a/filesetup.c b/filesetup.c > > index 7cbce1327f8f..d981c61f5b7f 100644 > > --- a/filesetup.c > > +++ b/filesetup.c > > @@ -16,6 +16,7 @@ > > #include "hash.h" > > #include "lib/axmap.h" > > #include "rwlock.h" > > +#include "zbc.h" > > > > #ifdef CONFIG_LINUX_FALLOCATE > > #include <linux/falloc.h> > > @@ -773,6 +774,9 @@ static int get_file_sizes(struct thread_data *td) > > */ > > if (f->real_file_size == -1ULL && td->o.size) > > f->real_file_size = td->o.size / td->o.nr_files; > > + > > + if (f->filetype == FIO_TYPE_BLOCK) > > + zbc_init_zone_info(f); > > } > > > > return err; > > @@ -1165,7 +1169,9 @@ done: > > td->done = 1; > > > > td_restore_runstate(td, old_state); > > - return 0; > > + > > + return zbc_verify_options(); > > + > > err_offset: > > log_err("%s: you need to specify valid offset=\n", o->name); > > err_out: > > diff --git a/init.c b/init.c > > index e47e5384119b..3ea5ea57e3a1 100644 > > --- a/init.c > > +++ b/init.c > > @@ -2266,6 +2266,10 @@ const struct debug_level debug_levels[] = { > > .help = "Helper thread logging", > > .shift = FD_HELPERTHREAD, > > }, > > + { .name = "zbc", > > + .help = "Zoned Block Device logging", > > + .shift = FD_ZBC, > > + }, > > { .name = NULL, }, > > }; > > > > diff --git a/io_u.c b/io_u.c > > index 01b36938d1b5..ba4dbb9d8ecc 100644 > > --- a/io_u.c > > +++ b/io_u.c > > @@ -14,6 +14,7 @@ > > #include "err.h" > > #include "lib/pow2.h" > > #include "minmax.h" > > +#include "zbc.h" > > > > struct io_completion_data { > > int nr; /* input */ > > @@ -558,6 +559,9 @@ static int get_next_offset(struct thread_data *td, struct io_u *io_u, > > if (get_next_block(td, io_u, ddir, rw_seq_hit, is_random)) > > return 1; > > > > + if (zbc_adjust_block(td, io_u)) > > + return 1; > > + > > if (io_u->offset >= f->io_size) { > > dprint(FD_IO, "get_next_offset: offset %llu >= io_size %llu\n", > > (unsigned long long) io_u->offset, > > diff --git a/ioengines.c b/ioengines.c > > index 965581aa4157..a04a977cca9f 100644 > > --- a/ioengines.c > > +++ b/ioengines.c > > @@ -19,6 +19,7 @@ > > > > #include "fio.h" > > #include "diskutil.h" > > +#include "zbc.h" > > > > static FLIST_HEAD(engine_list); > > > > @@ -320,6 +321,8 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u) > > } > > > > ret = td->io_ops->queue(td, io_u); > > + if (ret < FIO_Q_BUSY) > > + zbc_update_wp(td, io_u); > > > > unlock_file(td, io_u->file); > > > > diff --git a/zbc.c b/zbc.c > > new file mode 100644 > > index 000000000000..57e1981ee35d > > --- /dev/null > > +++ b/zbc.c > > @@ -0,0 +1,604 @@ > > +/* > > + * Copyright (C) 2018 Western Digital Corporation or its affiliates. > > + * > > + * This file is released under the GPL. > > + */ > > + > > +#include <errno.h> > > +#include <string.h> > > +#include <stdlib.h> > > +#include <dirent.h> > > +#include <fcntl.h> > > +#include <sys/ioctl.h> > > +#include <unistd.h> > > +#include <linux/blkzoned.h> > > +#include "file.h" > > +#include "fio.h" > > +#include "log.h" > > +#include "zbc.h" > > + > > +/* Return the name of the first entry in a directory */ > > +static char *get_first_dirent(const char *dir_path) > > +{ > > + char *res = NULL; > > + struct dirent *e; > > + DIR *d; > > + > > + d = opendir(dir_path); > > + if (!d) > > + return NULL; > > + while ((e = readdir(d))) { > > + /* Skip "." and ".." */ > > + if (e->d_name[0] == '.') > > + continue; > > + res = strdup(e->d_name); > > + break; > > + } > > + closedir(d); > > + > > + return res; > > +} > > + > > +/* > > + * Convert a block device name into a SCSI device path, e.g. /dev/sdc into > > + * /sys/class/scsi_device/0:0:0:0. > > + */ > > +static int bdev_to_scsi(char **scsi_id, const char *bdev) > > +{ > > + char *bdev_path = NULL, *bdev2 = NULL; > > + struct dirent *e; > > + bool matches; > > + DIR *d; > > + int res = 0; > > + > > + if (strncmp(bdev, "/dev/", 5) != 0) > > + return -EINVAL; > > + d = opendir("/sys/class/scsi_device"); > > + if (!d) > > + return -ENOMEM; > > + while ((e = readdir(d))) { > > + /* Skip "." and ".." */ > > + if (e->d_name[0] == '.') > > + continue; > > + free(bdev_path); > > + bdev_path = NULL; > > + res = -ENOMEM; > > + if (asprintf(&bdev_path, > > + "/sys/class/scsi_device/%s/device/block", > > + e->d_name) < 0) > > + break; > > + bdev2 = get_first_dirent(bdev_path); > > + matches = bdev2 && strcmp(bdev2, bdev + 5) == 0; > > + free(bdev2); > > + if (matches) { > > + *scsi_id = strdup(e->d_name); > > + res = 0; > > + break; > > + } > > + res = -ENOENT; > > + } > > + closedir(d); > > + > > + free(bdev_path); > > + > > + return res; > > +} > > + > > +/* > > + * Get the SCSI device type from VPD page 0x80. That device type is called the > > + * "peripheral device type" in the SCSI SPC-5 standard. Returns -ENXIO if and > > + * only if the device is not a SCSI device. > > + */ > > +static int get_scsi_device_type(const char *bdev) > > +{ > > + char *scsi_id = NULL, *vpd_pg80_path = NULL; > > + uint8_t vpd_pg80[8]; > > + int vpd_fd; > > + int ret; > > + > > + ret = bdev_to_scsi(&scsi_id, bdev); > > + if (ret < 0 && ret != -ENOENT) > > + return ret; > > + if (scsi_id == NULL) > > + return -ENXIO; > > + dprint(FD_ZBC, "Block device %s has SCSI ID %s\n", bdev, scsi_id); > > + ret = -ENOMEM; > > + if (asprintf(&vpd_pg80_path, > > + "/sys/class/scsi_device/%s/device/vpd_pg80", scsi_id) < 0) > > + goto out; > > + > > + vpd_fd = open(vpd_pg80_path, O_RDONLY); > > + if (vpd_fd < 0) > > + goto out; > > + ret = read(vpd_fd, vpd_pg80, sizeof(vpd_pg80)); > > + close(vpd_fd); > > + if (ret < sizeof(vpd_pg80)) > > + goto out; > > + > > + ret = vpd_pg80[0] & 0x1f; > > + > > +out: > > + free(vpd_pg80_path); > > + free(scsi_id); > > + return ret; > > +} > > + > > +/** > > + * zbc_reset_zone - reset the write pointer of one or more zones > > + * @f: FIO file associated with the disk for which to reset write pointers > > + * @sector: First sector for which to reset the write pointer in units of 512 > > + * bytes. > > + * @nr_sectors: Number of sectors to reset the write pointer of. > > + */ > > +static int zbc_reset_zone(const struct fio_file *f, uint64_t sector, > > + uint64_t nr_sectors) > > +{ > > + struct blk_zone_range zr = { > > + .sector = sector, > > + .nr_sectors = nr_sectors > > + }; > > + int ret; > > + > > + ret = ioctl(f->fd, BLKRESETZONE, &zr); > > + if (ret < 0) > > + log_err("%s: resetting wp for %lu sectors at sector %lu failed (%d).\n", > > + f->file_name, nr_sectors, sector, errno); > > + return ret; > > +} > > + > > +/* > > + * Read zone information into @buf starting from sector @start_sector. > > + * @fd is a file descriptor that refers to a block device and @bufsz is the > > + * size of @buf. > > + */ > > +static int read_zone_info(int fd, uint64_t start_sector, > > + void *buf, unsigned int bufsz) > > +{ > > + struct blk_zone_report *hdr = buf; > > + > > + if (bufsz < sizeof(*hdr)) > > + return -EINVAL; > > + > > + memset(hdr, 0, sizeof(*hdr)); > > + > > + hdr->nr_zones = (bufsz - sizeof(*hdr)) / sizeof(struct blk_zone); > > + hdr->sector = start_sector; > > + return ioctl(fd, BLKREPORTZONE, hdr); > > +} > > + > > +/* > > + * Initialize f->zbd_info. > > + */ > > +int zbc_init_zone_info(struct fio_file *f) > > +{ > > + const unsigned int bufsz = sizeof(struct blk_zone_report) + > > + 32768 * sizeof(struct blk_zone); > > + unsigned int nr_zones; > > + struct blk_zone_report *hdr; > > + const struct blk_zone *z; > > + struct fio_zone_info *p; > > + uint64_t zone_size, start_sector; > > + struct zoned_block_device_info *zbd_info = NULL; > > + void *buf; > > + int fd, i, j, ret = -ENOMEM; > > + > > + ret = get_scsi_device_type(f->file_name); > > + if (ret == -ENXIO) { > > + dprint(FD_ZBC, "%s: not a SCSI device\n", f->file_name); > > + ret = 0; > > + goto out; > > + } > > + if (ret < 0) > > + log_info("fio: unable to determine device type for %s.\n", > > + f->file_name); > > + > > + dprint(FD_ZBC, "Block device %s has SCSI device type %#x\n", > > + f->file_name, ret); > > + > > + if (ret != 0x14 /* ZBC */) { > > + ret = 0; > > + goto out; > > + } > > + > > + dprint(FD_ZBC, "Reading zone information for device %s\n", > > + f->file_name); > > + > > + buf = malloc(bufsz); > > + if (!buf) > > + goto out; > > + > > + fd = open(f->file_name, O_RDONLY | O_LARGEFILE); > > + if (fd < 0) { > > + ret = -errno; > > + goto free; > > + } > > + > > + ret = read_zone_info(fd, 0, buf, bufsz); > > + if (ret < 0) { > > + log_info("fio: BLKREPORTZONE(%lu) failed for %s (%d).\n", > > + 0UL, f->file_name, errno); > > + goto close; > > + } > > + hdr = buf; > > + if (hdr->nr_zones < 1) { > > + log_info("fio: %s has invalid zone information.\n", > > + f->file_name); > > + goto close; > > + } > > + z = (void *)(hdr + 1); > > + zone_size = z->len; > > + nr_zones = (f->real_file_size >> 9) / zone_size; > > + > > + dprint(FD_ZBC, "Device %s has %d zones of size %lu\n", f->file_name, > > + nr_zones, zone_size); > > + > > + zbd_info = calloc(1, sizeof(*zbd_info) + > > + (nr_zones + 1) * sizeof(zbd_info->zone_info[0])); > > + ret = -ENOMEM; > > + if (!zbd_info) > > + goto close; > > + p = &zbd_info->zone_info[0]; > > + for (start_sector = 0, j = 0; j < nr_zones;) { > > + z = (void *)(hdr + 1); > > + for (i = 0; i < hdr->nr_zones; i++, j++, z++, p++) { > > + p->start = z->start; > > + p->wp = z->wp; > > + p->type = z->type; > > + if (j > 0 && p->start != p[-1].start + zone_size) { > > + log_info("%s: invalid zone data\n", > > + f->file_name); > > + ret = -EINVAL; > > + goto close; > > + } > > + } > > + z--; > > + start_sector = z->start + z->len; > > + if (j >= nr_zones) > > + break; > > + ret = read_zone_info(fd, start_sector, buf, bufsz); > > + if (ret < 0) { > > + log_info("fio: BLKREPORTZONE(%lu) failed for %s (%d).\n", > > + start_sector, f->file_name, errno); > > + goto close; > > + } > > + } > > + /* a sentinel */ > > + zbd_info->zone_info[nr_zones].start = start_sector; > > + > > + f->zbd_info = zbd_info; > > + f->zbd_info->zone_size = zone_size; > > + f->zbd_info->nr_zones = nr_zones; > > + zbd_info = NULL; > > + ret = 0; > > + > > +close: > > + free(zbd_info); > > + close(fd); > > +free: > > + free(buf); > > +out: > > + return ret; > > +} > > + > > +/** > > + * zbc_zone_idx - convert an offset into a zone number > > + * @td: thread data. > > + * @f: file pointer. > > + * @offset: offset in bytes. If this offset equals the disk size then the > > + * index of the sentinel is returned. > > + */ > > +static uint32_t zbc_zone_idx(const struct thread_data *td, > > + const struct fio_file *f, uint64_t offset) > > +{ > > + uint32_t zone_idx = (offset >> 9) / f->zbd_info->zone_size; > > + > > + assert(offset <= f->real_file_size); > > + assert(zone_idx <= f->zbd_info->nr_zones); > > + return zone_idx; > > +} > > + > > +static bool zone_full(const struct fio_file *f, const struct fio_zone_info *z) > > +{ > > + return z->wp >= z->start + f->zbd_info->zone_size; > > +} > > + > > +static bool is_valid_offset(const struct fio_file *f, uint64_t offset) > > +{ > > + return (uint64_t)(offset - f->file_offset) < f->io_size; > > +} > > + > > +/* Verify whether direct I/O is used for all ZBC drives. */ > > +static bool zbc_using_direct_io(void) > > +{ > > + struct thread_data *td; > > + struct fio_file *f; > > + int i, j; > > + > > + for_each_td(td, i) { > > + if (td->o.odirect) > > + continue; > > + for_each_file(td, f, j) { > > + if (f->zbd_info) > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +static bool other_job_writes_to(struct thread_data *const td, > > + struct fio_file *const f, > > + const int i, const int j) > > +{ > > + struct thread_data *td2; > > + struct fio_file *f2; > > + int k, m; > > + > > + for_each_td(td2, k) { > > + if ((td->o.td_ddir & (TD_DDIR_WRITE | TD_DDIR_TRIM)) == 0) > > + continue; > > + for_each_file(td2, f2, m) { > > + if (k == i && m == j) > > + continue; > > + if (f2->zbd_info && > > + strcmp(f->file_name, f2->file_name) == 0) > > + return true; > > + } > > + } > > + > > + return false; > > +} > > + > > +/* > > + * Check whether multiple ZBC write or trim jobs have been specified for the > > + * same drive. > > + * > > + * To do: refine this code such that locking is only required if multiple > > + * ZBC write or trim jobs have been specified for the same drive. > > + */ > > +static bool zbc_multiple_writers(void) > > +{ > > + struct thread_data *td; > > + struct fio_file *f; > > + int i, j; > > + > > + for_each_td(td, i) { > > + if ((td->o.td_ddir & (TD_DDIR_WRITE | TD_DDIR_TRIM)) == 0) > > + continue; > > + for_each_file(td, f, j) > > + if (f->zbd_info && > > + other_job_writes_to(td, f, i, j)) > > + return true; > > + } > > + > > + return false; > > +} > > + > > +static bool zbc_verify_sizes(void) > > +{ > > + const struct fio_zone_info *z; > > + struct thread_data *td; > > + struct fio_file *f; > > + uint64_t new_offset, new_start; > > + uint32_t zone_idx; > > + int i, j; > > + > > + for_each_td(td, i) { > > + for_each_file(td, f, j) { > > + if (!f->zbd_info) > > + continue; > > + zone_idx = zbc_zone_idx(td, f, f->file_offset); > > + z = &f->zbd_info->zone_info[zone_idx]; > > + if ((z->start << 9) != f->file_offset) { > > + new_offset = (z->start + > > + f->zbd_info->zone_size) << 9; > > + log_info("%s: rounded up offset from %lu to %lu\n", > > + f->file_name, f->file_offset, > > + new_offset); > > + f->io_size -= (new_offset - f->file_offset); > > + f->file_offset = new_offset; > > + } > > + zone_idx = zbc_zone_idx(td, f, f->file_offset + > > + f->io_size); > > + z = &f->zbd_info->zone_info[zone_idx]; > > + new_start = z->start << 9; > > + if (f->file_offset + f->io_size != new_start) { > > + if (new_start == f->file_offset) { > > + log_info("%s: io_size must be at least one zone\n", > > + f->file_name); > > + return false; > > + } > > + log_info("%s: rounded down io_size from %lu to %lu\n", > > + f->file_name, f->io_size, new_start); > > + f->io_size = new_start; > > + } > > + } > > + } > > + > > + return true; > > +} > > + > > +int zbc_verify_options(void) > > +{ > > + if (!zbc_using_direct_io()) { > > + log_err("Using direct I/O is mandatory for ZBC drives\n\n"); > > + return 1; > > + } > > + > > + if (zbc_multiple_writers()) { > > + log_err("Concurrent writing to ZBC disks is not supported\n\n"); > > + return 1; > > + } > > + > > + if (!zbc_verify_sizes()) > > + return 1; > > + > > + return 0; > > +} > > + > > +/** > > + * zbc_adjust_block - adjust the offset and length as necessary for ZBC drives > > + * @td: FIO thread data. > > + * @io_u: FIO I/O unit. > > + * > > + * Returns 0 if the I/O unit should be used and 1 if not. > > + */ > > +int zbc_adjust_block(const struct thread_data *td, struct io_u *io_u) > > +{ > > + const struct fio_file *f = io_u->file; > > + uint32_t zone_idx_b, zone_idx_e; > > + struct fio_zone_info *zb, *ze; > > + uint64_t offset, orig_o = io_u->offset; > > + uint32_t orig_len = io_u->buflen; > > + uint64_t delta; > > + > > + if (!f->zbd_info) > > + return 0; > > + > > + zone_idx_b = zbc_zone_idx(td, f, f->file_offset + io_u->offset); > > + zone_idx_e = zbc_zone_idx(td, f, f->file_offset + io_u->offset + > > + (io_u->buflen ? io_u->buflen - 1 : 0)); > > + zb = &f->zbd_info->zone_info[zone_idx_b]; > > + ze = &f->zbd_info->zone_info[zone_idx_e]; > > + > > + switch (io_u->ddir) { > > + case DDIR_READ: > > + /* > > + * From the ZBC spec: a read operation past the write pointer > > + * of a zone shall return logical block data set to the last > > + * initialization pattern that was set at manufacture time, > > + * by the FORMAT UNIT command or by the most recent SANITIZE > > + * command with the service action set to OVERWRITE. Hence, > > + * for random I/O, do not read past the write pointer. > > + */ > > + if (!td_random(td)) > > + return 0; > > + if (io_u->buflen > ((zb->wp - zb->start) << 9)) > > + return 1; > > + if (io_u->offset + io_u->buflen > (zb->wp << 9)) { > > + io_u->offset = (zb->wp << 9) - io_u->buflen; > > + dprint(FD_IO, > > + "changed write offset from %ld into %lld\n", > > + orig_o, io_u->offset); > > + } > > + return 0; > > + case DDIR_WRITE: > > + if (zb->type != BLK_ZONE_TYPE_SEQWRITE_REQ) > > + return 0; > > + /* Make writes occur at the write pointer */ > > + if (zone_full(f, zb)) { > > + if (zbc_reset_zone(f, zb->start, > > + f->zbd_info->zone_size) < 0) > > + return 1; > > + zb->wp = zb->start; > > + offset = zb->start << 9; > > + dprint(FD_IO, > > + "reset zone write pointer at offset %ld\n", > > + offset); > > + } else { > > + offset = zb->wp << 9; > > + } > > + if (!is_valid_offset(f, offset)) > > + return 1; > > + io_u->offset = offset - f->file_offset; > > + if (orig_o != io_u->offset) > > + dprint(FD_IO, > > + "changed write offset from %ld into %lld\n", > > + orig_o, io_u->offset); > > + /* Shrink write requests that cross zone boundaries. */ > > + if (zone_idx_b != zone_idx_e) { > > + io_u->buflen = ((zb->start + f->zbd_info->zone_size) > > + << 9) - (io_u->offset + f->file_offset); > > + dprint(FD_IO, "Changed length from %u into %lu\n", > > + orig_len, io_u->buflen); > > + } > > + return 0; > > + case DDIR_TRIM: > > + /* Align trims to zone boundaries. */ > > + if (zone_idx_b == zone_idx_e) { > > + if (zb->type == BLK_ZONE_TYPE_SEQWRITE_REQ && > > + io_u->buflen < f->zbd_info->zone_size) > > + io_u->buflen = 0; > > + } else { > > + if (zb->type == BLK_ZONE_TYPE_SEQWRITE_REQ) { > > + delta = f->zbd_info->zone_size - > > + (f->file_offset + io_u->offset - > > + (zb->start << 9)); > > + io_u->offset += delta; > > + io_u->buflen -= delta; > > + } > > + if (ze->type == BLK_ZONE_TYPE_SEQWRITE_REQ) > > + io_u->buflen -= (f->file_offset + > > + io_u->offset + io_u->buflen) - > > + (ze->start << 9); > > + } > > + dprint(FD_IO, "Changed trim range from %lu + %u into %llu + %lu (adjustments: offset + %llu; len - %lu)\n", > > + orig_o, orig_len, io_u->offset, io_u->buflen, > > + io_u->offset - orig_o, orig_len - io_u->buflen); > > + return 0; > > + case DDIR_SYNC: > > + case DDIR_DATASYNC: > > + case DDIR_SYNC_FILE_RANGE: > > + case DDIR_WAIT: > > + case DDIR_LAST: > > + case DDIR_INVAL: > > + return 0; > > + } > > + > > + assert(false); > > + return 1; > > +} > > + > > +/** > > + * zbc_update_wp - update the write pointer > > + * @td: thread data > > + * @io_u: I/O unit > > + * > > + * For write and trim operations, update the write pointer of all affected > > + * zones. > > + */ > > +void zbc_update_wp(struct thread_data *td, const struct io_u *io_u) > > +{ > > + struct zoned_block_device_info *zbd_info; > > + struct fio_zone_info *z; > > + uint32_t zone_idx; > > + uint64_t end; > > + > > + if (!io_u->file->zbd_info) > > + return; > > + > > + switch (io_u->ddir) { > > + case DDIR_READ: > > + case DDIR_SYNC: > > + case DDIR_DATASYNC: > > + case DDIR_SYNC_FILE_RANGE: > > + case DDIR_WAIT: > > + case DDIR_LAST: > > + case DDIR_INVAL: > > + return; > > + case DDIR_WRITE: > > + case DDIR_TRIM: > > + break; > > + } > > + > > + zbd_info = io_u->file->zbd_info; > > + zone_idx = zbc_zone_idx(td, io_u->file, io_u->offset); > > + end = (io_u->offset + io_u->buflen) >> 9; > > + for (z = &zbd_info->zone_info[zone_idx]; z->start < end; > > + z++, zone_idx++) { > > + assert(zone_idx < zbd_info->nr_zones); > > + if (z->type != BLK_ZONE_TYPE_SEQWRITE_REQ) > > + continue; > > + switch (io_u->ddir) { > > + case DDIR_WRITE: > > + z->wp = end; > > + break; > > + case DDIR_TRIM: > > + z->wp = z->start; > > + break; > > + default: > > + assert(false); > > + break; > > + } > > + } > > +} > > diff --git a/zbc.h b/zbc.h > > new file mode 100644 > > index 000000000000..de2a39551ca8 > > --- /dev/null > > +++ b/zbc.h > > @@ -0,0 +1,69 @@ > > +/* > > + * Copyright (C) 2018 Western Digital Corporation or its affiliates. > > + * > > + * This file is released under the GPL. > > + */ > > + > > +#ifndef FIO_ZBC_H > > +#define FIO_ZBC_H > > + > > +#include <inttypes.h> > > + > > +struct fio_file; > > + > > +/** > > + * struct fio_zone_info - information about a single ZBC zone > > + * @start: zone start in 512 byte units > > + * @wp: zone write pointer location in 512 byte units > > + * @type: zone type as defined by enum blk_zone_type > > + */ > > +struct fio_zone_info { > > + uint64_t start; > > + uint64_t wp; > > + uint8_t type; > > +}; > > + > > +/** > > + * zoned_block_device_info - zoned block device characteristics > > + * @zone_size: size of a single zone in units of 512 bytes > > + * @nr_zones: number of zones > > + * @zone_info: description of the individual zones > > + * > > + * Only devices for which all zones have the same size are supported. > > + * Note: if the capacity is not a multiple of the zone size then the last zone > > + * will be smaller than 'zone_size'. > > + */ > > +struct zoned_block_device_info { > > + uint64_t zone_size; > > + uint64_t nr_zones; > > + struct fio_zone_info zone_info[0]; > > +}; > > + > > +#ifdef HAVE_LINUX_BLKZONED_H > > +int zbc_init_zone_info(struct fio_file *f); > > +int zbc_verify_options(void); > > +int zbc_adjust_block(const struct thread_data *td, struct io_u *io_u); > > +void zbc_update_wp(struct thread_data *td, const struct io_u *io_u); > > +#else > > +static inline int zbc_init_zone_info(struct fio_file *f) > > +{ > > + return 0; > > +} > > + > > +static inline int zbc_verify_options(void) > > +{ > > + return 0; > > +} > > + > > +static inline int zbc_adjust_block(struct thread_data *td, struct io_u *io_u) > > +{ > > + return 0; > > +} > > + > > +static inline void zbc_update_wp(struct thread_data *td, > > + const struct io_u *io_u) > > +{ > > +} > > +#endif > > + > > +#endif /* FIO_ZBC_H */ > > > > > -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html