From: Mikulas Patocka <mpatocka@xxxxxxxxxx> Until now bio-based DM core has always cloned an incoming bio, remapped the clone bio to a low layer, dealt with the clone's completion and then finally completed the original bio. This cloning can be avoided for READ and WRITE bios if the target opts-in by setting ti->no_clone. Avoiding cloning for READ and WRITE bios improves performance of targets that do very little work in response to each bio (e.g. dm-linear and dm-striped). The improvement is accomplished by changing DM core to allocate a 'dm_noclone' structure (that is quite small) instead of cloning the bio. The bio's bi_end_io and bi_private are saved in the 'dm_noclone' before they are overwritten and the bio passed to the lower block device. When the bio is finished, the function noclone_endio restores the values bi_end_io and bi_private and passes the bio to the original bi_end_io function. If the allocation of the 'struct dm_noclone' fails then bio-based DM falls back to the traditional bio cloning IO path that is backed my mempool reservations. Performance improvement for dm-linear: x86-64, 2x six-core /dev/ram0 2449MiB/s /dev/mapper/lin 5.0-rc without optimization 1970MiB/s /dev/mapper/lin 5.0-rc with optimization 2238MiB/s arm64, quad core: /dev/ram0 457MiB/s /dev/mapper/lin 5.0-rc without optimization 325MiB/s /dev/mapper/lin 5.0-rc with optimization 364MiB/s Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> Signed-off-by: Mike Snitzer <snitzer@xxxxxxxxxx> --- drivers/md/dm-core.h | 1 + drivers/md/dm-linear.c | 1 + drivers/md/dm-stripe.c | 1 + drivers/md/dm-table.c | 11 +++++++ drivers/md/dm-zero.c | 1 + drivers/md/dm.c | 71 ++++++++++++++++++++++++++++++++++++++++++- drivers/md/dm.h | 1 + include/linux/device-mapper.h | 5 +++ 8 files changed, 91 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 95c6d86ab5e8..b4832bba9d64 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -87,6 +87,7 @@ struct mapped_device { */ struct bio_set io_bs; struct bio_set bs; + struct kmem_cache *noclone_cache; /* * Processing queue (flush) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index ad980a38fb1e..6e1df9fdfcc8 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_secure_erase_bios = 1; ti->num_write_same_bios = 1; ti->num_write_zeroes_bios = 1; + ti->no_clone = true; ti->private = lc; return 0; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 8547d7594338..32181b7ca34a 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -172,6 +172,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_secure_erase_bios = stripes; ti->num_write_same_bios = stripes; ti->num_write_zeroes_bios = stripes; + ti->no_clone = true; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4b1be754cc41..6a3e23faeb7d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -47,6 +47,7 @@ struct dm_table { bool integrity_supported:1; bool singleton:1; + bool no_clone:1; unsigned integrity_added:1; /* @@ -191,6 +192,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!t) return -ENOMEM; + t->no_clone = true; + INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->target_callbacks); @@ -789,6 +792,9 @@ int dm_table_add_target(struct dm_table *t, const char *type, if (r) goto bad; + if (!tgt->no_clone) + t->no_clone = false; + t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; if (!tgt->num_discard_bios && tgt->discards_supported) @@ -1376,6 +1382,11 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev, return 0; } +bool dm_table_supports_noclone(struct dm_table *table) +{ + return table->no_clone; +} + /* * Check whether a table has no data devices attached using each * target's iterate_devices method. diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index b65ca8dcfbdc..436a5ee89698 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -26,6 +26,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) * Silently drop discards, avoiding -EOPNOTSUPP. */ ti->num_discard_bios = 1; + ti->no_clone = true; return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1b87d20041e7..cbda11b34635 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -158,8 +158,16 @@ struct table_device { struct dm_dev dm_dev; }; +struct dm_noclone { + struct mapped_device *md; + bio_end_io_t *orig_bi_end_io; + void *orig_bi_private; + unsigned long start_time; +}; + static struct kmem_cache *_rq_tio_cache; static struct kmem_cache *_rq_cache; +static struct kmem_cache *_noclone_cache; /* * Bio-based DM's mempools' reserved IOs set by the user. @@ -233,9 +241,13 @@ static int __init local_init(void) if (!_rq_cache) goto out_free_rq_tio_cache; + _noclone_cache = KMEM_CACHE(dm_noclone, 0); + if (!_rq_tio_cache) + goto out_free_rq_cache; + r = dm_uevent_init(); if (r) - goto out_free_rq_cache; + goto out_free_noclone_cache; deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); if (!deferred_remove_workqueue) { @@ -257,6 +269,8 @@ static int __init local_init(void) destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); +out_free_noclone_cache: + kmem_cache_destroy(_noclone_cache); out_free_rq_cache: kmem_cache_destroy(_rq_cache); out_free_rq_tio_cache: @@ -270,6 +284,7 @@ static void local_exit(void) flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); + kmem_cache_destroy(_noclone_cache); kmem_cache_destroy(_rq_cache); kmem_cache_destroy(_rq_tio_cache); unregister_blkdev(_major, _name); @@ -1009,6 +1024,20 @@ static void clone_endio(struct bio *bio) dec_pending(io, error); } +static void noclone_endio(struct bio *bio) +{ + struct dm_noclone *noclone = bio->bi_private; + struct mapped_device *md = noclone->md; + + end_io_acct(md, bio, noclone->start_time); + + bio->bi_end_io = noclone->orig_bi_end_io; + bio->bi_private = noclone->orig_bi_private; + kmem_cache_free(_noclone_cache, noclone); + + bio_endio(bio); +} + /* * Return maximum size of I/O possible at the supplied sector up to the current * target boundary. @@ -1774,8 +1803,48 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) return ret; } + if (dm_table_supports_noclone(map) && + (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) && + likely(!(bio->bi_opf & REQ_PREFLUSH)) && + !bio_flagged(bio, BIO_CHAIN) && + likely(!bio_integrity(bio)) && + likely(!dm_stats_used(&md->stats))) { + int r; + struct dm_noclone *noclone; + struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector); + if (unlikely(!dm_target_is_valid(ti))) + goto no_fast_path; + if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti))) + goto no_fast_path; + noclone = kmem_cache_alloc(_noclone_cache, GFP_NOWAIT); + if (unlikely(!noclone)) + goto no_fast_path; + noclone->md = md; + noclone->start_time = jiffies; + noclone->orig_bi_end_io = bio->bi_end_io; + noclone->orig_bi_private = bio->bi_private; + bio->bi_end_io = noclone_endio; + bio->bi_private = noclone; + start_io_acct(md, bio); + r = ti->type->map(ti, bio); + ret = BLK_QC_T_NONE; + if (likely(r == DM_MAPIO_REMAPPED)) { + ret = generic_make_request(bio); + } else if (likely(r == DM_MAPIO_SUBMITTED)) { + } else if (r == DM_MAPIO_KILL) { + bio->bi_status = BLK_STS_IOERR; + noclone_endio(bio); + } else { + DMWARN("unimplemented target map return value: %d", r); + BUG(); + } + goto put_table_ret; + } + +no_fast_path: ret = dm_process_bio(md, map, bio); +put_table_ret: dm_put_live_table(md, srcu_idx); return ret; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 2d539b82ec08..c3c78123dfb3 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -53,6 +53,7 @@ void dm_table_event_callback(struct dm_table *t, void (*fn)(void *), void *context); struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); +bool dm_table_supports_noclone(struct dm_table *t); bool dm_table_has_no_data_devices(struct dm_table *table); int dm_calculate_queue_limits(struct dm_table *table, struct queue_limits *limits); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 0f5b3d7c6cb3..4ab2b0f53ae8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -315,6 +315,11 @@ struct dm_target { * whether or not its underlying devices have support. */ bool discards_supported:1; + + /* + * The target can process bios without cloning them. + */ + bool no_clone:1; }; /* Each target can link one of these into the table */ -- 2.15.0 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel