From: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> Add the following limits: - atomic_write_boundary_bytes - atomic_write_max_bytes - atomic_write_unit_max_bytes - atomic_write_unit_min_bytes All atomic writes limits are initialised to 0 to indicate no atomic write support. Stacked devices are just not supported either for now. Signed-off-by: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> #jpg: Heavy rewrite Signed-off-by: John Garry <john.g.garry@xxxxxxxxxx> --- This will conflict with https://lore.kernel.org/linux-nvme/20240122173645.1686078-1-hch@xxxxxx/T/#mf77609a2064fe9387706ce564d8246c5243eeb99, but I will rebase when that is merged and I assume blk_atomic_writes_update_limits() will be merged into a larger "update" function. Documentation/ABI/stable/sysfs-block | 52 ++++++++++++++++++ block/blk-settings.c | 79 ++++++++++++++++++++++++++++ block/blk-sysfs.c | 33 ++++++++++++ include/linux/blkdev.h | 40 ++++++++++++++ 4 files changed, 204 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index 1fe9a553c37b..ac3c6b46f1a3 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -21,6 +21,58 @@ Description: device is offset from the internal allocation unit's natural alignment. +What: /sys/block/<disk>/atomic_write_max_bytes +Date: January 2024 +Contact: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> +Description: + [RO] This parameter specifies the maximum atomic write + size reported by the device. This parameter is relevant + for merging of writes, where a merged atomic write + operation must not exceed this number of bytes. + This parameter may be greater to the value in + atomic_write_unit_max_bytes as + atomic_write_unit_max_bytes will be rounded down to a + power-of-two and atomic_write_unit_max_bytes may also be + limited by some other queue limits, such as max_segments. + This parameter - along with atomic_write_unit_min_bytes + and atomic_write_unit_max_bytes - will not be larger than + max_hw_sectors_kb, but may be larger than max_sectors_kb. + + +What: /sys/block/<disk>/atomic_write_unit_min_bytes +Date: January 2024 +Contact: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> +Description: + [RO] This parameter specifies the smallest block which can + be written atomically with an atomic write operation. All + atomic write operations must begin at a + atomic_write_unit_min boundary and must be multiples of + atomic_write_unit_min. This value must be a power-of-two. + + +What: /sys/block/<disk>/atomic_write_unit_max_bytes +Date: January 2024 +Contact: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> +Description: + [RO] This parameter defines the largest block which can be + written atomically with an atomic write operation. This + value must be a multiple of atomic_write_unit_min and must + be a power-of-two. This value will not be larger than + atomic_write_max_bytes. + + +What: /sys/block/<disk>/atomic_write_boundary_bytes +Date: January 2024 +Contact: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> +Description: + [RO] A device may need to internally split I/Os which + straddle a given logical block address boundary. In that + case a single atomic write operation will be processed as + one of more sub-operations which each complete atomically. + This parameter specifies the size in bytes of the atomic + boundary if one is reported by the device. This value must + be a power-of-two. + What: /sys/block/<disk>/diskseq Date: February 2021 diff --git a/block/blk-settings.c b/block/blk-settings.c index 06ea91e51b8b..11c0361c2313 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -59,6 +59,13 @@ void blk_set_default_limits(struct queue_limits *lim) lim->zoned = false; lim->zone_write_granularity = 0; lim->dma_alignment = 511; + lim->atomic_write_hw_max_sectors = 0; + lim->atomic_write_max_sectors = 0; + lim->atomic_write_hw_boundary_sectors = 0; + lim->atomic_write_hw_unit_min_sectors = 0; + lim->atomic_write_unit_min_sectors = 0; + lim->atomic_write_hw_unit_max_sectors = 0; + lim->atomic_write_unit_max_sectors = 0; } /** @@ -101,6 +108,20 @@ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce bounce) } EXPORT_SYMBOL(blk_queue_bounce_limit); +static void blk_atomic_writes_update_limits(struct request_queue *q) +{ + struct queue_limits *limits = &q->limits; + unsigned int max_hw_sectors = + rounddown_pow_of_two(limits->max_hw_sectors); + + limits->atomic_write_max_sectors = + min(limits->atomic_write_hw_max_sectors, max_hw_sectors); + limits->atomic_write_unit_min_sectors = + min(limits->atomic_write_hw_unit_min_sectors, max_hw_sectors); + limits->atomic_write_unit_max_sectors = + min(limits->atomic_write_hw_unit_max_sectors, max_hw_sectors); +} + /** * blk_queue_max_hw_sectors - set max sectors for a request for this queue * @q: the request queue for the device @@ -145,6 +166,8 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto limits->logical_block_size >> SECTOR_SHIFT); limits->max_sectors = max_sectors; + blk_atomic_writes_update_limits(q); + if (!q->disk) return; q->disk->bdi->io_pages = max_sectors >> (PAGE_SHIFT - 9); @@ -182,6 +205,62 @@ void blk_queue_max_discard_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_discard_sectors); +/** + * blk_queue_atomic_write_max_bytes - set max bytes supported by + * the device for atomic write operations. + * @q: the request queue for the device + * @bytes: maximum bytes supported + */ +void blk_queue_atomic_write_max_bytes(struct request_queue *q, + unsigned int bytes) +{ + q->limits.atomic_write_hw_max_sectors = bytes >> SECTOR_SHIFT; + blk_atomic_writes_update_limits(q); +} +EXPORT_SYMBOL(blk_queue_atomic_write_max_bytes); + +/** + * blk_queue_atomic_write_boundary_bytes - Device's logical block address space + * which an atomic write should not cross. + * @q: the request queue for the device + * @bytes: must be a power-of-two. + */ +void blk_queue_atomic_write_boundary_bytes(struct request_queue *q, + unsigned int bytes) +{ + q->limits.atomic_write_hw_boundary_sectors = bytes >> SECTOR_SHIFT; +} +EXPORT_SYMBOL(blk_queue_atomic_write_boundary_bytes); + +/** + * blk_queue_atomic_write_unit_min_sectors - smallest unit that can be written + * atomically to the device. + * @q: the request queue for the device + * @sectors: must be a power-of-two. + */ +void blk_queue_atomic_write_unit_min_sectors(struct request_queue *q, + unsigned int sectors) +{ + + q->limits.atomic_write_hw_unit_min_sectors = sectors; + blk_atomic_writes_update_limits(q); +} +EXPORT_SYMBOL(blk_queue_atomic_write_unit_min_sectors); + +/* + * blk_queue_atomic_write_unit_max_sectors - largest unit that can be written + * atomically to the device. + * @q: the request queue for the device + * @sectors: must be a power-of-two. + */ +void blk_queue_atomic_write_unit_max_sectors(struct request_queue *q, + unsigned int sectors) +{ + q->limits.atomic_write_hw_unit_max_sectors = sectors; + blk_atomic_writes_update_limits(q); +} +EXPORT_SYMBOL(blk_queue_atomic_write_unit_max_sectors); + /** * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase * @q: the request queue for the device diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6b2429cad81a..3978f14f9769 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -118,6 +118,30 @@ static ssize_t queue_max_discard_segments_show(struct request_queue *q, return queue_var_show(queue_max_discard_segments(q), page); } +static ssize_t queue_atomic_write_max_bytes_show(struct request_queue *q, + char *page) +{ + return queue_var_show(queue_atomic_write_max_bytes(q), page); +} + +static ssize_t queue_atomic_write_boundary_show(struct request_queue *q, + char *page) +{ + return queue_var_show(queue_atomic_write_boundary_bytes(q), page); +} + +static ssize_t queue_atomic_write_unit_min_show(struct request_queue *q, + char *page) +{ + return queue_var_show(queue_atomic_write_unit_min_bytes(q), page); +} + +static ssize_t queue_atomic_write_unit_max_show(struct request_queue *q, + char *page) +{ + return queue_var_show(queue_atomic_write_unit_max_bytes(q), page); +} + static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) { return queue_var_show(q->limits.max_integrity_segments, page); @@ -502,6 +526,11 @@ QUEUE_RO_ENTRY(queue_discard_max_hw, "discard_max_hw_bytes"); QUEUE_RW_ENTRY(queue_discard_max, "discard_max_bytes"); QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); +QUEUE_RO_ENTRY(queue_atomic_write_max_bytes, "atomic_write_max_bytes"); +QUEUE_RO_ENTRY(queue_atomic_write_boundary, "atomic_write_boundary_bytes"); +QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes"); +QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); + QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes"); QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes"); @@ -629,6 +658,10 @@ static struct attribute *queue_attrs[] = { &queue_discard_max_entry.attr, &queue_discard_max_hw_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_atomic_write_max_bytes_entry.attr, + &queue_atomic_write_boundary_entry.attr, + &queue_atomic_write_unit_min_entry.attr, + &queue_atomic_write_unit_max_entry.attr, &queue_write_same_max_entry.attr, &queue_write_zeroes_max_entry.attr, &queue_zone_append_max_entry.attr, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99e4f5e72213..d5490b988918 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -299,6 +299,14 @@ struct queue_limits { unsigned int discard_alignment; unsigned int zone_write_granularity; + unsigned int atomic_write_hw_max_sectors; + unsigned int atomic_write_max_sectors; + unsigned int atomic_write_hw_boundary_sectors; + unsigned int atomic_write_hw_unit_min_sectors; + unsigned int atomic_write_unit_min_sectors; + unsigned int atomic_write_hw_unit_max_sectors; + unsigned int atomic_write_unit_max_sectors; + unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -885,6 +893,14 @@ void blk_queue_zone_write_granularity(struct request_queue *q, unsigned int size); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); +void blk_queue_atomic_write_max_bytes(struct request_queue *q, + unsigned int bytes); +void blk_queue_atomic_write_unit_max_sectors(struct request_queue *q, + unsigned int sectors); +void blk_queue_atomic_write_unit_min_sectors(struct request_queue *q, + unsigned int sectors); +void blk_queue_atomic_write_boundary_bytes(struct request_queue *q, + unsigned int bytes); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); @@ -1291,6 +1307,30 @@ static inline int queue_dma_alignment(const struct request_queue *q) return q ? q->limits.dma_alignment : 511; } +static inline unsigned int +queue_atomic_write_unit_max_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_unit_max_sectors << SECTOR_SHIFT; +} + +static inline unsigned int +queue_atomic_write_unit_min_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_unit_min_sectors << SECTOR_SHIFT; +} + +static inline unsigned int +queue_atomic_write_boundary_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_hw_boundary_sectors << SECTOR_SHIFT; +} + +static inline unsigned int +queue_atomic_write_max_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_max_sectors << SECTOR_SHIFT; +} + static inline unsigned int bdev_dma_alignment(struct block_device *bdev) { return queue_dma_alignment(bdev_get_queue(bdev)); -- 2.31.1