On 4/16/22 05:17, Bart Van Assche wrote: > ZBC-2 allows host-managed disks to report gap zones. Another new feature > in ZBC-2 is support for constant zone starting LBA offsets. These two > features allow zoned disks to report a starting LBA offset that is a > power of two even if the number of logical blocks with data per zone is > not a power of two. I think this last sentence would be clearer phrased like this: These two features allow zoned disks to report zone start LBAs that are a power of two even if the number of logical blocks with data per zone is not a power of two. > > For zoned disks that report a constant zone starting LBA offset, hide > the gap zones from the block layer. Report the starting LBA offset as > zone size and report the number of logical blocks with data per zone as > the zone capacity. > > Signed-off-by: Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> > [ bvanassche: Reworked this patch ] > Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx> > --- > drivers/scsi/sd.h | 5 +++ > drivers/scsi/sd_zbc.c | 84 +++++++++++++++++++++++++++++++++++---- > include/scsi/scsi_proto.h | 8 +++- > 3 files changed, 88 insertions(+), 9 deletions(-) > > diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h > index 2e983578a699..e0793e789fdc 100644 > --- a/drivers/scsi/sd.h > +++ b/drivers/scsi/sd.h > @@ -95,6 +95,11 @@ struct scsi_disk { > u32 zones_optimal_open; > u32 zones_optimal_nonseq; > u32 zones_max_open; > + /* > + * Either zero or a power of two. If not zero it means that the offset > + * between zone starting LBAs is constant. > + */ > + u32 zone_starting_lba_gran; > u32 *zones_wp_offset; > spinlock_t zones_wp_offset_lock; > u32 *rev_wp_offset; > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c > index fe46b4b9913a..92eace611364 100644 > --- a/drivers/scsi/sd_zbc.c > +++ b/drivers/scsi/sd_zbc.c > @@ -37,7 +37,7 @@ static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone) > case BLK_ZONE_COND_CLOSED: > return zone->wp - zone->start; > case BLK_ZONE_COND_FULL: > - return zone->len; > + return zone->capacity; > case BLK_ZONE_COND_EMPTY: > case BLK_ZONE_COND_OFFLINE: > case BLK_ZONE_COND_READONLY: > @@ -50,6 +50,12 @@ static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone) > } > } > > +/* Whether or not a SCSI zone descriptor describes a gap zone. */ > +static bool sd_zbc_is_gap_zone(const u8 buf[64]) > +{ > + return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP; > +} > + > /** > * sd_zbc_parse_report - Parse a SCSI zone descriptor > * @sdkp: SCSI disk pointer. > @@ -68,8 +74,12 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], > { > struct scsi_device *sdp = sdkp->device; > struct blk_zone zone = { 0 }; > + sector_t gran; > + u64 start_lba; > int ret; > > + if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) > + return -EINVAL; > zone.type = buf[0] & 0x0f; > zone.cond = (buf[1] >> 4) & 0xf; > if (buf[1] & 0x01) > @@ -79,9 +89,25 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], > > zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); > zone.capacity = zone.len; > - zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16])); > + start_lba = get_unaligned_be64(&buf[16]); > + zone.start = logical_to_sectors(sdp, start_lba); > + if (sdkp->zone_starting_lba_gran) { > + idx = start_lba >> ilog2(sdkp->zone_starting_lba_gran); > + gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran); > + if (zone.capacity > zone.len || zone.len > gran) { > + sd_printk(KERN_ERR, sdkp, > + "Invalid zone for LBA %llu: zone capacity %llu; zone length %llu; granularity %llu\n", > + start_lba, zone.capacity, zone.len, gran); > + return -EINVAL; > + } > + /* > + * Change the zone length obtained from REPORT ZONES into the > + * zone starting LBA granularity. > + */ > + zone.len = gran; > + } > if (zone.cond == ZBC_ZONE_COND_FULL) > - zone.wp = zone.start + zone.len; > + zone.wp = zone.start + zone.capacity; > else > zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); > > @@ -227,6 +253,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, > sdkp->capacity); > unsigned int nr, i; > unsigned char *buf; > + u64 zone_length, start_lba; > size_t offset, buflen = 0; > int zone_idx = 0; > int ret; > @@ -254,16 +281,33 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, > if (!nr) > break; > > - for (i = 0; i < nr && zone_idx < nr_zones; i++) { > + for (i = 0; i < nr && zone_idx < nr_zones; i++, zone_idx++) { > offset += 64; > + zone_length = get_unaligned_be64(&buf[offset + 8]); > + start_lba = get_unaligned_be64(&buf[offset + 16]); > + if (start_lba < sectors_to_logical(sdkp->device, sector) > + || start_lba + zone_length < start_lba) { > + sd_printk(KERN_ERR, sdkp, > + "Zone %d is invalid: %llu + %llu\n", > + zone_idx, start_lba, zone_length); > + ret = -EINVAL; > + goto out; > + } > + sector = logical_to_sectors(sdkp->device, start_lba + > + zone_length); > + if (sd_zbc_is_gap_zone(&buf[offset])) { > + if (sdkp->zone_starting_lba_gran) > + continue; > + sd_printk(KERN_ERR, sdkp, > + "Gap zone without constant LBA offsets\n"); > + ret = -EINVAL; > + goto out; > + } > ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, > cb, data); > if (ret) > goto out; > - zone_idx++; > } > - > - sector += sd_zbc_zone_sectors(sdkp) * i; > } > > ret = zone_idx; > @@ -580,6 +624,8 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, > static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, > unsigned char *buf) > { > + u64 zone_starting_lba_gran; > + u8 zone_alignment_method; > > if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) { > sd_printk(KERN_NOTICE, sdkp, > @@ -599,6 +645,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, > sdkp->zones_optimal_open = 0; > sdkp->zones_optimal_nonseq = 0; > sdkp->zones_max_open = get_unaligned_be32(&buf[16]); > + zone_alignment_method = buf[23] & 0xf; > + if (zone_alignment_method == > + ZBC_CONSTANT_ZONE_STARTING_LBA_OFFSETS) { > + zone_starting_lba_gran = > + get_unaligned_be64(&buf[24]); > + if (zone_starting_lba_gran == 0) { > + sd_printk(KERN_ERR, sdkp, > + "Inconsistent: zone starting LBA granularity is zero\n"); > + } > + if (!is_power_of_2(zone_starting_lba_gran) || > + logical_to_sectors(sdkp->device, > + zone_starting_lba_gran) > > + UINT_MAX) { > + sd_printk(KERN_ERR, sdkp, > + "Invalid zone starting LBA granularity %llu\n", > + zone_starting_lba_gran); > + return -EINVAL; > + } > + sdkp->zone_starting_lba_gran = zone_starting_lba_gran; > + WARN_ON_ONCE(sdkp->zone_starting_lba_gran != > + zone_starting_lba_gran); > + } > } > > /* > @@ -664,7 +732,7 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, > return -EFBIG; > } > > - *zblocks = zone_blocks; > + *zblocks = sdkp->zone_starting_lba_gran ? : zone_blocks; > > if (!is_power_of_2(*zblocks)) { > sd_printk(KERN_ERR, sdkp, > diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h > index f017843a8124..521f9feac778 100644 > --- a/include/scsi/scsi_proto.h > +++ b/include/scsi/scsi_proto.h > @@ -307,7 +307,9 @@ enum zbc_zone_type { > ZBC_ZONE_TYPE_CONV = 0x1, > ZBC_ZONE_TYPE_SEQWRITE_REQ = 0x2, > ZBC_ZONE_TYPE_SEQWRITE_PREF = 0x3, > - /* 0x4 to 0xf are reserved */ > + ZBC_ZONE_TYPE_SEQ_OR_BEFORE_REQ = 0x4, > + ZBC_ZONE_TYPE_GAP = 0x5, > + /* 0x6 to 0xf are reserved */ > }; > > /* Zone conditions of REPORT ZONES zone descriptors */ > @@ -323,6 +325,10 @@ enum zbc_zone_cond { > ZBC_ZONE_COND_OFFLINE = 0xf, > }; > > +enum zbc_zone_alignment_method { > + ZBC_CONSTANT_ZONE_STARTING_LBA_OFFSETS = 8, > +}; > + > /* Version descriptor values for INQUIRY */ > enum scsi_version_descriptor { > SCSI_VERSION_DESCRIPTOR_FCP4 = 0x0a40, -- Damien Le Moal Western Digital Research