Add support for limiting CPU latency while block IO is running. When a block IO is started, it will add a user configurable CPU latency limit in place (if any.) The limit is removed with a configurable timeout mechanism. Signed-off-by: Tero Kristo <tero.kristo@xxxxxxxxxxxxxxx> --- block/bdev.c | 51 +++++++++++++++++++++++++++++++++++++++ block/bio.c | 2 ++ block/blk.h | 2 ++ include/linux/blk_types.h | 12 +++++++++ 4 files changed, 67 insertions(+) diff --git a/block/bdev.c b/block/bdev.c index 353677ac49b3..8f20681a4ea6 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -405,10 +405,18 @@ void __init bdev_cache_init(void) blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */ } +static void bdev_pm_qos_work(struct work_struct *work) +{ + struct bdev_cpu_latency_qos *qos = + container_of(work, struct bdev_cpu_latency_qos, work.work); + dev_pm_qos_remove_request(&qos->req); +} + struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) { struct block_device *bdev; struct inode *inode; + int cpu; inode = new_inode(blockdev_superblock); if (!inode) @@ -433,6 +441,16 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) return NULL; } bdev->bd_disk = disk; + bdev->bd_pm_qos = alloc_percpu(struct bdev_cpu_latency_qos); + if (!bdev->bd_pm_qos) { + free_percpu(bdev->bd_stats); + iput(inode); + return NULL; + } + for_each_possible_cpu(cpu) + INIT_DELAYED_WORK(per_cpu_ptr(&bdev->bd_pm_qos->work, cpu), + bdev_pm_qos_work); + bdev->cpu_lat_limit = -1; return bdev; } @@ -462,6 +480,19 @@ void bdev_unhash(struct block_device *bdev) void bdev_drop(struct block_device *bdev) { + int cpu; + struct bdev_cpu_latency_qos *qos; + + for_each_possible_cpu(cpu) { + qos = per_cpu_ptr(bdev->bd_pm_qos, cpu); + if (dev_pm_qos_request_active(&qos->req)) { + cancel_delayed_work(&qos->work); + dev_pm_qos_remove_request(&qos->req); + } + } + + free_percpu(bdev->bd_pm_qos); + iput(BD_INODE(bdev)); } @@ -1281,6 +1312,26 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) blkdev_put_no_open(bdev); } +void bdev_update_cpu_latency_pm_qos(struct block_device *bdev) +{ + int cpu; + struct bdev_cpu_latency_qos *qos; + + if (!bdev || bdev->cpu_lat_limit < 0) + return; + + cpu = raw_smp_processor_id(); + qos = per_cpu_ptr(bdev->bd_pm_qos, cpu); + + if (!dev_pm_qos_request_active(&qos->req)) + dev_pm_qos_add_request(get_cpu_device(cpu), &qos->req, + DEV_PM_QOS_RESUME_LATENCY, + bdev->cpu_lat_limit); + + mod_delayed_work(system_wq, &qos->work, + msecs_to_jiffies(bdev->cpu_lat_timeout)); +} + bool disk_live(struct gendisk *disk) { return !inode_unhashed(BD_INODE(disk->part0)); diff --git a/block/bio.c b/block/bio.c index e9e809a63c59..6c46d75345d7 100644 --- a/block/bio.c +++ b/block/bio.c @@ -282,6 +282,8 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, bio->bi_max_vecs = max_vecs; bio->bi_io_vec = table; bio->bi_pool = NULL; + + bdev_update_cpu_latency_pm_qos(bio->bi_bdev); } EXPORT_SYMBOL(bio_init); diff --git a/block/blk.h b/block/blk.h index 189bc25beb50..dda2a188984b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -516,6 +516,8 @@ void drop_partition(struct block_device *part); void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors); +void bdev_update_cpu_latency_pm_qos(struct block_device *bdev); + struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 781c4500491b..0ed29603eaa9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -11,6 +11,7 @@ #include <linux/device.h> #include <linux/ktime.h> #include <linux/rw_hint.h> +#include <linux/pm_qos.h> struct bio_set; struct bio; @@ -38,6 +39,11 @@ struct bio_crypt_ctx; #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) #define SECTOR_MASK (PAGE_SECTORS - 1) +struct bdev_cpu_latency_qos { + struct dev_pm_qos_request req; + struct delayed_work work; +}; + struct block_device { sector_t bd_start_sect; sector_t bd_nr_sectors; @@ -71,6 +77,12 @@ struct block_device { struct partition_meta_info *bd_meta_info; int bd_writers; + + /* For preventing deep idle during block I/O */ + struct bdev_cpu_latency_qos __percpu *bd_pm_qos; + int cpu_lat_timeout; + int cpu_lat_limit; + /* * keep this out-of-line as it's both big and not needed in the fast * path -- 2.43.1