From: Hongyu Jin <hongyu.jin@xxxxxxxxxx> when read FEC and hash from disk, I/O priority are inconsistent with data block and blocked by other I/O with low I/O priority. Add dm_bufio_prefetch_by_ioprio() and dm_bufio_read_by_ioprio(), can pecific I/O priority for some I/O. Make I/O for FEC and hash has same I/O priority with data I/O. Co-developed-by: Yibin Ding <yibin.ding@xxxxxxxxxx> Signed-off-by: Yibin Ding <yibin.ding@xxxxxxxxxx> Signed-off-by: Hongyu Jin <hongyu.jin@xxxxxxxxxx> --- Changes in v2: - Add ioprio field in struct dm_io_region - Initial struct dm_io_region::ioprio to IOPRIO_DEFAULT - Add two interface --- drivers/md/dm-bufio.c | 50 ++++++++++++++++++++++----------- drivers/md/dm-integrity.c | 5 ++++ drivers/md/dm-io.c | 1 + drivers/md/dm-log.c | 1 + drivers/md/dm-raid1.c | 2 ++ drivers/md/dm-snap-persistent.c | 2 ++ drivers/md/dm-verity-fec.c | 3 +- drivers/md/dm-verity-target.c | 10 +++++-- drivers/md/dm-writecache.c | 4 +++ include/linux/dm-bufio.h | 6 ++++ include/linux/dm-io.h | 2 ++ 11 files changed, 66 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 62eb27639c9b..f1f89b89ff6d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1256,7 +1256,7 @@ static void dmio_complete(unsigned long error, void *context) } static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, - unsigned int n_sectors, unsigned int offset) + unsigned int n_sectors, unsigned int offset, unsigned short ioprio) { int r; struct dm_io_request io_req = { @@ -1269,6 +1269,7 @@ static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, .bdev = b->c->bdev, .sector = sector, .count = n_sectors, + .ioprio = ioprio, }; if (b->data_mode != DATA_MODE_VMALLOC) { @@ -1295,7 +1296,7 @@ static void bio_complete(struct bio *bio) } static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, - unsigned int n_sectors, unsigned int offset) + unsigned int n_sectors, unsigned int offset, unsigned short ioprio) { struct bio *bio; char *ptr; @@ -1303,13 +1304,14 @@ static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, bio = bio_kmalloc(1, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); if (!bio) { - use_dmio(b, op, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset, ioprio); return; } bio_init(bio, b->c->bdev, bio->bi_inline_vecs, 1, op); bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_complete; bio->bi_private = b; + bio->bi_ioprio = ioprio; ptr = (char *)b->data + offset; len = n_sectors << SECTOR_SHIFT; @@ -1332,7 +1334,7 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block return sector; } -static void submit_io(struct dm_buffer *b, enum req_op op, +static void submit_io(struct dm_buffer *b, enum req_op op, unsigned short ioprio, void (*end_io)(struct dm_buffer *, blk_status_t)) { unsigned int n_sectors; @@ -1362,9 +1364,9 @@ static void submit_io(struct dm_buffer *b, enum req_op op, } if (b->data_mode != DATA_MODE_VMALLOC) - use_bio(b, op, sector, n_sectors, offset); + use_bio(b, op, sector, n_sectors, offset, ioprio); else - use_dmio(b, op, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset, ioprio); } /* @@ -1420,7 +1422,7 @@ static void __write_dirty_buffer(struct dm_buffer *b, b->write_end = b->dirty_end; if (!write_list) - submit_io(b, REQ_OP_WRITE, write_endio); + submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio); else list_add_tail(&b->write_list, write_list); } @@ -1434,7 +1436,7 @@ static void __flush_write_list(struct list_head *write_list) struct dm_buffer *b = list_entry(write_list->next, struct dm_buffer, write_list); list_del(&b->write_list); - submit_io(b, REQ_OP_WRITE, write_endio); + submit_io(b, REQ_OP_WRITE, IOPRIO_DEFAULT, write_endio); cond_resched(); } blk_finish_plug(&plug); @@ -1816,7 +1818,7 @@ static void read_endio(struct dm_buffer *b, blk_status_t status) * and uses dm_bufio_mark_buffer_dirty to write new data back). */ static void *new_read(struct dm_bufio_client *c, sector_t block, - enum new_flag nf, struct dm_buffer **bp) + enum new_flag nf, struct dm_buffer **bp, unsigned short ioprio) { int need_submit = 0; struct dm_buffer *b; @@ -1869,7 +1871,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, return NULL; if (need_submit) - submit_io(b, REQ_OP_READ, read_endio); + submit_io(b, REQ_OP_READ, ioprio, read_endio); wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE); @@ -1889,19 +1891,26 @@ static void *new_read(struct dm_bufio_client *c, sector_t block, void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) { - return new_read(c, block, NF_GET, bp); + return new_read(c, block, NF_GET, bp, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_get); void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) +{ + return dm_bufio_read_by_ioprio(c, block, bp, IOPRIO_DEFAULT); +} +EXPORT_SYMBOL_GPL(dm_bufio_read); + +void *dm_bufio_read_by_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio) { if (WARN_ON_ONCE(dm_bufio_in_request())) return ERR_PTR(-EINVAL); - return new_read(c, block, NF_READ, bp); + return new_read(c, block, NF_READ, bp, ioprio); } -EXPORT_SYMBOL_GPL(dm_bufio_read); +EXPORT_SYMBOL_GPL(dm_bufio_read_by_ioprio); void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp) @@ -1909,12 +1918,19 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, if (WARN_ON_ONCE(dm_bufio_in_request())) return ERR_PTR(-EINVAL); - return new_read(c, block, NF_FRESH, bp); + return new_read(c, block, NF_FRESH, bp, IOPRIO_DEFAULT); } EXPORT_SYMBOL_GPL(dm_bufio_new); void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks) +{ + return dm_bufio_prefetch_by_ioprio(c, block, n_blocks, IOPRIO_DEFAULT); +} +EXPORT_SYMBOL_GPL(dm_bufio_prefetch); + +void dm_bufio_prefetch_by_ioprio(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, unsigned short ioprio) { struct blk_plug plug; @@ -1950,7 +1966,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, dm_bufio_unlock(c); if (need_submit) - submit_io(b, REQ_OP_READ, read_endio); + submit_io(b, REQ_OP_READ, ioprio, read_endio); dm_bufio_release(b); cond_resched(); @@ -1965,7 +1981,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c, flush_plug: blk_finish_plug(&plug); } -EXPORT_SYMBOL_GPL(dm_bufio_prefetch); +EXPORT_SYMBOL_GPL(dm_bufio_prefetch_by_ioprio); void dm_bufio_release(struct dm_buffer *b) { @@ -2125,6 +2141,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) .bdev = c->bdev, .sector = 0, .count = 0, + .ioprio = IOPRIO_DEFAULT, }; if (WARN_ON_ONCE(dm_bufio_in_request())) @@ -2149,6 +2166,7 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c .bdev = c->bdev, .sector = block_to_sector(c, block), .count = block_to_sector(c, count), + .ioprio = IOPRIO_DEFAULT, }; if (WARN_ON_ONCE(dm_bufio_in_request())) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index e85c688fd91e..4bbfaf8f5230 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -543,6 +543,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; io_loc.sector = ic->start; io_loc.count = SB_SECTORS; + io_loc.ioprio = IOPRIO_DEFAULT; if (op == REQ_OP_WRITE) { sb_set_version(ic); @@ -1070,6 +1071,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, io_loc.bdev = ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev; io_loc.sector = ic->start + SB_SECTORS + sector; io_loc.count = n_sectors; + io_loc.ioprio = IOPRIO_DEFAULT; r = dm_io(&io_req, 1, &io_loc, NULL); if (unlikely(r)) { @@ -1187,6 +1189,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned int section, u io_loc.bdev = ic->dev->bdev; io_loc.sector = target; io_loc.count = n_sectors; + io_loc.IOPRIO_DEFAULT = IOPRIO_DEFAULT; r = dm_io(&io_req, 1, &io_loc, NULL); if (unlikely(r)) { @@ -1515,6 +1518,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat fr.io_reg.bdev = ic->dev->bdev, fr.io_reg.sector = 0, fr.io_reg.count = 0, + fr.io_reg.ioprio = IOPRIO_DEFAULT, fr.ic = ic; init_completion(&fr.comp); r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); @@ -2738,6 +2742,7 @@ static void integrity_recalc(struct work_struct *w) io_loc.bdev = ic->dev->bdev; io_loc.sector = get_data_sector(ic, area, offset); io_loc.count = n_sectors; + io_loc.ioprio = IOPRIO_DEFAULT; r = dm_io(&io_req, 1, &io_loc, NULL); if (unlikely(r)) { diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index f053ce245814..b40f0a432981 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -354,6 +354,7 @@ static void do_region(const blk_opf_t opf, unsigned int region, &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio->bi_end_io = endio; + bio->bi_ioprio = where->ioprio; store_io_and_region_in_bio(bio, io, region); if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index f9f84236dfcd..e0dacdcd94f1 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -309,6 +309,7 @@ static int flush_header(struct log_c *lc) .bdev = lc->header_location.bdev, .sector = 0, .count = 0, + .ioprio = IOPRIO_DEFAULT, }; lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ddcb2bc4a617..2de9b1377de3 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -275,6 +275,7 @@ static int mirror_flush(struct dm_target *ti) io[i].bdev = m->dev->bdev; io[i].sector = 0; io[i].count = 0; + io[i].ioprio = IOPRIO_DEFAULT; } error_bits = -1; @@ -475,6 +476,7 @@ static void map_region(struct dm_io_region *io, struct mirror *m, io->bdev = m->dev->bdev; io->sector = map_sector(m, bio); io->count = bio_sectors(bio); + io->ioprio = bio_prio(bio); } static void hold_bio(struct mirror_set *ms, struct bio *bio) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 15649921f2a9..d8f911727058 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -236,6 +236,8 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, .bdev = dm_snap_cow(ps->store->snap)->bdev, .sector = ps->store->chunk_size * chunk, .count = ps->store->chunk_size, + .ioprio = IOPRIO_DEFAULT, + }; struct dm_io_request io_req = { .bi_opf = opf, diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 3ef9f018da60..160a4de56b28 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -209,6 +209,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, u8 *bbuf, *rs_block; u8 want_digest[HASH_MAX_DIGESTSIZE]; unsigned int n, k; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); if (neras) *neras = 0; @@ -247,7 +248,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, bufio = v->bufio; } - bbuf = dm_bufio_read(bufio, block, &buf); + bbuf = dm_bufio_read_by_ioprio(bufio, block, &buf, bio->bi_ioprio); if (IS_ERR(bbuf)) { DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", v->data_dev->name, diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 26adcfea0302..5945ac1dfdff 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -51,6 +51,7 @@ static DEFINE_STATIC_KEY_FALSE(use_tasklet_enabled); struct dm_verity_prefetch_work { struct work_struct work; struct dm_verity *v; + struct dm_verity_io *io; sector_t block; unsigned int n_blocks; }; @@ -293,6 +294,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, int r; sector_t hash_block; unsigned int offset; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); verity_hash_at_level(v, block, level, &hash_block, &offset); @@ -307,7 +309,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, return -EAGAIN; } } else - data = dm_bufio_read(v->bufio, hash_block, &buf); + data = dm_bufio_read_by_ioprio(v->bufio, hash_block, &buf, bio->bi_ioprio); if (IS_ERR(data)) return PTR_ERR(data); @@ -692,6 +694,7 @@ static void verity_prefetch_io(struct work_struct *work) container_of(work, struct dm_verity_prefetch_work, work); struct dm_verity *v = pw->v; int i; + struct bio *bio = dm_bio_from_per_bio_data(pw->io, v->ti->per_io_data_size); for (i = v->levels - 2; i >= 0; i--) { sector_t hash_block_start; @@ -716,8 +719,8 @@ static void verity_prefetch_io(struct work_struct *work) hash_block_end = v->hash_blocks - 1; } no_prefetch_cluster: - dm_bufio_prefetch(v->bufio, hash_block_start, - hash_block_end - hash_block_start + 1); + dm_bufio_prefetch_by_ioprio(v->bufio, hash_block_start, + hash_block_end - hash_block_start + 1, bio->bi_ioprio); } kfree(pw); @@ -751,6 +754,7 @@ static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io) pw->v = v; pw->block = block; pw->n_blocks = n_blocks; + pw->io = io; queue_work(v->verify_wq, &pw->work); } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 074cb785eafc..135d1268246f 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -515,6 +515,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) region.bdev = wc->ssd_dev->bdev; region.sector = (sector_t)i * (BITMAP_GRANULARITY >> SECTOR_SHIFT); region.count = (sector_t)(j - i) * (BITMAP_GRANULARITY >> SECTOR_SHIFT); + region.ioprio = IOPRIO_DEFAULT; if (unlikely(region.sector >= wc->metadata_sectors)) break; @@ -555,6 +556,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) region.bdev = wc->ssd_dev->bdev; region.sector = 0; region.count = max(4096U, wc->block_size) >> SECTOR_SHIFT; + region.ioprio = IOPRIO_DEFAULT; if (unlikely(region.sector + region.count > wc->metadata_sectors)) region.count = wc->metadata_sectors - region.sector; @@ -590,6 +592,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) region.bdev = dev->bdev; region.sector = 0; region.count = 0; + region.ioprio = IOPRIO_DEFAULT; req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; req.mem.type = DM_IO_KMEM; req.mem.ptr.addr = NULL; @@ -984,6 +987,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors region.bdev = wc->ssd_dev->bdev; region.sector = wc->start_sector; region.count = n_sectors; + region.ioprio = IOPRIO_DEFAULT; req.bi_opf = REQ_OP_READ | REQ_SYNC; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map; diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb532..39e939bf0419 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -11,6 +11,7 @@ #define _LINUX_DM_BUFIO_H #include <linux/blkdev.h> +#include <linux/ioprio.h> #include <linux/types.h> /*----------------------------------------------------------------*/ @@ -64,6 +65,9 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start); void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void *dm_bufio_read_by_ioprio(struct dm_bufio_client *c, sector_t block, + struct dm_buffer **bp, unsigned short ioprio); + /* * Like dm_bufio_read, but return buffer from cache, don't read * it. If the buffer is not in the cache, return NULL. @@ -86,6 +90,8 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, void dm_bufio_prefetch(struct dm_bufio_client *c, sector_t block, unsigned int n_blocks); +void dm_bufio_prefetch_by_ioprio(struct dm_bufio_client *c, + sector_t block, unsigned int n_blocks, unsigned short ioprio); /* * Release a reference obtained with dm_bufio_{read,get,new}. The data * pointer and dm_buffer pointer is no longer valid after this call. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 7595142f3fc5..227ee6d77c70 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -20,6 +20,8 @@ struct dm_io_region { struct block_device *bdev; sector_t sector; sector_t count; /* If this is zero the region is ignored. */ + /* Set it to IOPRIO_DEFAULT if you don't know what value to set */ + unsigned short ioprio; }; struct page_list { -- 2.34.1