power_of_2(PO2) emulation support is added to the null blk device to measure performance. Callbacks are added in the hotpaths that require PO2 emulation specific behaviour to reduce the impact on exisiting path. The power_of_2 emulation support is wired up for both the request and the bio queue mode and it is automatically enabled when the given zone size is non-power_of_2. Signed-off-by: Pankaj Raghav <p.raghav@xxxxxxxxxxx> --- drivers/block/null_blk/main.c | 8 +- drivers/block/null_blk/null_blk.h | 12 ++ drivers/block/null_blk/zoned.c | 203 ++++++++++++++++++++++++++---- 3 files changed, 196 insertions(+), 27 deletions(-) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 625a06bfa5ad..c926b59f2b17 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -210,7 +210,7 @@ MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Defau static unsigned long g_zone_size = 256; module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); -MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); +MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Default: 256"); static unsigned long g_zone_capacity; module_param_named(zone_capacity, g_zone_capacity, ulong, 0444); @@ -1772,11 +1772,13 @@ static const struct block_device_operations null_bio_ops = { .owner = THIS_MODULE, .submit_bio = null_submit_bio, .report_zones = null_report_zones, + .npo2_zone_setup = null_po2_zone_emu_setup, }; static const struct block_device_operations null_rq_ops = { .owner = THIS_MODULE, .report_zones = null_report_zones, + .npo2_zone_setup = null_po2_zone_emu_setup, }; static int setup_commands(struct nullb_queue *nq) @@ -1929,8 +1931,8 @@ static int null_validate_conf(struct nullb_device *dev) dev->mbps = 0; if (dev->zoned && - (!dev->zone_size || !is_power_of_2(dev->zone_size))) { - pr_err("zone_size must be power-of-two\n"); + (!dev->zone_size)) { + pr_err("zone_size must be zero\n"); return -EINVAL; } diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 78eb56b0ca55..34c1b7b2546b 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -74,6 +74,16 @@ struct nullb_device { unsigned int imp_close_zone_no; struct nullb_zone *zones; sector_t zone_size_sects; + sector_t zone_size_po2_sects; + sector_t zone_size_diff_sects; + /* The callbacks below are used as hook to perform po2 emulation for a + * zoned device. + */ + unsigned int (*zone_no)(struct nullb_device *dev, + sector_t sect); + sector_t (*zone_update_sector)(struct nullb_device *dev, sector_t sect); + sector_t (*zone_update_sector_append)(struct nullb_device *dev, + sector_t sect); bool need_zone_res_mgmt; spinlock_t zone_res_lock; @@ -137,6 +147,7 @@ int null_register_zoned_dev(struct nullb *nullb); void null_free_zoned_dev(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); +void null_po2_zone_emu_setup(struct gendisk *disk); blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors); @@ -166,5 +177,6 @@ static inline size_t null_zone_valid_read_len(struct nullb *nullb, return len; } #define null_report_zones NULL +#define null_po2_zone_emu_setup NULL #endif /* CONFIG_BLK_DEV_ZONED */ #endif /* __NULL_BLK_H */ diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index dae54dd1aeac..3bb63c170149 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -16,6 +16,44 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) return sect >> ilog2(dev->zone_size_sects); } +static inline unsigned int null_npo2_zone_no(struct nullb_device *dev, + sector_t sect) +{ + return sect / dev->zone_size_sects; +} + +static inline bool null_is_po2_zone_emu(struct nullb_device *dev) +{ + return !!dev->zone_size_diff_sects; +} + +static inline sector_t null_zone_update_sector_noop(struct nullb_device *dev, + sector_t sect) +{ + return sect; +} + +static inline sector_t null_zone_update_sector_po2_emu(struct nullb_device *dev, + sector_t sect) +{ + sector_t zsze_po2 = dev->zone_size_po2_sects; + sector_t zsze_diff = dev->zone_size_diff_sects; + u32 zone_idx = sect >> ilog2(zsze_po2); + + sect = sect - (zone_idx * zsze_diff); + return sect; +} + +static inline sector_t +null_zone_update_sector_append_po2_emu(struct nullb_device *dev, sector_t sect) +{ + /* Need to readjust the sector if po2 emulation is used. */ + u32 zone_no = dev->zone_no(dev, sect); + + sect += dev->zone_size_diff_sects * zone_no; + return sect; +} + static inline void null_lock_zone_res(struct nullb_device *dev) { if (dev->need_zone_res_mgmt) @@ -62,15 +100,14 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) sector_t sector = 0; unsigned int i; - if (!is_power_of_2(dev->zone_size)) { - pr_err("zone_size must be power-of-two\n"); - return -EINVAL; - } if (dev->zone_size > dev->size) { pr_err("Zone size larger than device capacity\n"); return -EINVAL; } + if (!is_power_of_2(dev->zone_size)) + pr_info("zone_size is not power-of-two. power-of-two emulation is enabled"); + if (!dev->zone_capacity) dev->zone_capacity = dev->zone_size; @@ -83,8 +120,14 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) zone_capacity_sects = mb_to_sects(dev->zone_capacity); dev_capacity_sects = mb_to_sects(dev->size); dev->zone_size_sects = mb_to_sects(dev->zone_size); - dev->nr_zones = round_up(dev_capacity_sects, dev->zone_size_sects) - >> ilog2(dev->zone_size_sects); + + dev->nr_zones = roundup(dev_capacity_sects, dev->zone_size_sects) / + dev->zone_size_sects; + + dev->zone_no = null_zone_no; + dev->zone_update_sector = null_zone_update_sector_noop; + dev->zone_update_sector_append = null_zone_update_sector_noop; + dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone), GFP_KERNEL | __GFP_ZERO); @@ -166,7 +209,13 @@ int null_register_zoned_dev(struct nullb *nullb) if (ret) return ret; } else { - blk_queue_chunk_sectors(q, dev->zone_size_sects); + nullb->disk->fops->npo2_zone_setup(nullb->disk); + + if (null_is_po2_zone_emu(dev)) + blk_queue_chunk_sectors(q, dev->zone_size_po2_sects); + else + blk_queue_chunk_sectors(q, dev->zone_size_sects); + q->nr_zones = blkdev_nr_zones(nullb->disk); } @@ -183,17 +232,49 @@ void null_free_zoned_dev(struct nullb_device *dev) dev->zones = NULL; } +static void null_update_zone_info(struct nullb *nullb, struct blk_zone *blkz, + struct nullb_zone *zone) +{ + unsigned int zone_idx; + u64 zone_gap; + struct nullb_device *dev = nullb->dev; + + if (null_is_po2_zone_emu(dev)) { + zone_idx = zone->start / zone->len; + zone_gap = zone_idx * dev->zone_size_diff_sects; + + blkz->start = zone->start + zone_gap; + blkz->len = dev->zone_size_po2_sects; + blkz->wp = zone->wp + zone_gap; + } else { + blkz->start = zone->start; + blkz->len = zone->len; + blkz->wp = zone->wp; + } + + blkz->type = zone->type; + blkz->cond = zone->cond; + blkz->capacity = zone->capacity; +} + int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct nullb *nullb = disk->private_data; struct nullb_device *dev = nullb->dev; unsigned int first_zone, i; + sector_t zone_size; struct nullb_zone *zone; struct blk_zone blkz; int error; - first_zone = null_zone_no(dev, sector); + if (null_is_po2_zone_emu(dev)) + zone_size = dev->zone_size_po2_sects; + else + zone_size = dev->zone_size_sects; + + first_zone = sector / zone_size; + if (first_zone >= dev->nr_zones) return 0; @@ -210,12 +291,7 @@ int null_report_zones(struct gendisk *disk, sector_t sector, * array. */ null_lock_zone(dev, zone); - blkz.start = zone->start; - blkz.len = zone->len; - blkz.wp = zone->wp; - blkz.type = zone->type; - blkz.cond = zone->cond; - blkz.capacity = zone->capacity; + null_update_zone_info(nullb, &blkz, zone); null_unlock_zone(dev, zone); error = cb(&blkz, i, data); @@ -226,6 +302,35 @@ int null_report_zones(struct gendisk *disk, sector_t sector, return nr_zones; } +void null_po2_zone_emu_setup(struct gendisk *disk) +{ + struct nullb *nullb = disk->private_data; + struct nullb_device *dev = nullb->dev; + sector_t capacity; + + if (is_power_of_2(dev->zone_size_sects)) + return; + + if (!get_capacity(disk)) + return; + + blk_mq_freeze_queue(disk->queue); + + blk_queue_po2_zone_emu(disk->queue, 1); + dev->zone_size_po2_sects = + 1 << get_count_order_long(dev->zone_size_sects); + dev->zone_size_diff_sects = + dev->zone_size_po2_sects - dev->zone_size_sects; + dev->zone_no = null_npo2_zone_no; + dev->zone_update_sector = null_zone_update_sector_po2_emu; + dev->zone_update_sector_append = null_zone_update_sector_append_po2_emu; + + capacity = dev->nr_zones * dev->zone_size_po2_sects; + set_capacity(disk, capacity); + + blk_mq_unfreeze_queue(disk->queue); +} + /* * This is called in the case of memory backing from null_process_cmd() * with the target zone already locked. @@ -234,7 +339,7 @@ size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len) { struct nullb_device *dev = nullb->dev; - struct nullb_zone *zone = &dev->zones[null_zone_no(dev, sector)]; + struct nullb_zone *zone = &dev->zones[dev->zone_no(dev, sector)]; unsigned int nr_sectors = len >> SECTOR_SHIFT; /* Read must be below the write pointer position */ @@ -363,11 +468,24 @@ static blk_status_t null_check_zone_resources(struct nullb_device *dev, } } +static void null_update_sector_append(struct nullb_cmd *cmd, sector_t sector) +{ + struct nullb_device *dev = cmd->nq->dev; + + sector = dev->zone_update_sector_append(dev, sector); + + if (cmd->bio) { + cmd->bio->bi_iter.bi_sector = sector; + } else { + cmd->rq->__sector = sector; + } +} + static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, unsigned int nr_sectors, bool append) { struct nullb_device *dev = cmd->nq->dev; - unsigned int zno = null_zone_no(dev, sector); + unsigned int zno = dev->zone_no(dev, sector); struct nullb_zone *zone = &dev->zones[zno]; blk_status_t ret; @@ -395,10 +513,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, */ if (append) { sector = zone->wp; - if (cmd->bio) - cmd->bio->bi_iter.bi_sector = sector; - else - cmd->rq->__sector = sector; + null_update_sector_append(cmd, sector); } else if (sector != zone->wp) { ret = BLK_STS_IOERR; goto unlock; @@ -619,7 +734,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, return BLK_STS_OK; } - zone_no = null_zone_no(dev, sector); + zone_no = dev->zone_no(dev, sector); zone = &dev->zones[zone_no]; null_lock_zone(dev, zone); @@ -650,13 +765,54 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, return ret; } +static blk_status_t null_handle_po2_zone_emu_violation(struct nullb_cmd *cmd, + enum req_opf op) +{ + if (op == REQ_OP_READ) { + if (cmd->bio) + zero_fill_bio(cmd->bio); + else + zero_fill_bio(cmd->rq->bio); + + return BLK_STS_OK; + } else { + return BLK_STS_IOERR; + } +} + +static bool null_verify_sector_violation(struct nullb_device *dev, + sector_t sector) +{ + if (unlikely(null_is_po2_zone_emu(dev))) { + /* The zone idx should be calculated based on the emulated + * layout + */ + u32 zone_idx = sector >> ilog2(dev->zone_size_po2_sects); + sector_t zsze = dev->zone_size_sects; + sector_t sect = null_zone_update_sector_po2_emu(dev, sector); + + if (sect - (zone_idx * zsze) > zsze) + return true; + } + return false; +} + blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors) { - struct nullb_device *dev; + struct nullb_device *dev = cmd->nq->dev; struct nullb_zone *zone; blk_status_t sts; + /* Handle when the sector falls in the emulated area */ + if (unlikely(null_verify_sector_violation(dev, sector))) + return null_handle_po2_zone_emu_violation(cmd, op); + + /* The sector value is updated if po2 emulation is enabled, else it + * will have no effect on the value + */ + sector = dev->zone_update_sector(dev, sector); + switch (op) { case REQ_OP_WRITE: return null_zone_write(cmd, sector, nr_sectors, false); @@ -669,8 +825,7 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, case REQ_OP_ZONE_FINISH: return null_zone_mgmt(cmd, op, sector); default: - dev = cmd->nq->dev; - zone = &dev->zones[null_zone_no(dev, sector)]; + zone = &dev->zones[dev->zone_no(dev, sector)]; null_lock_zone(dev, zone); sts = null_process_cmd(cmd, op, sector, nr_sectors); -- 2.25.1