Use percpu inflight counters to avoid cache line bouncing and improve performance. Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx> --- drivers/md/dm-core.h | 5 +++++ drivers/md/dm.c | 50 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 16 deletions(-) Index: linux-dm/drivers/md/dm-core.h =================================================================== --- linux-dm.orig/drivers/md/dm-core.h 2018-11-15 22:06:37.000000000 +0100 +++ linux-dm/drivers/md/dm-core.h 2018-11-15 22:06:37.000000000 +0100 @@ -24,6 +24,10 @@ struct dm_kobject_holder { struct completion completion; }; +struct dm_percpu { + unsigned inflight[2]; +}; + /* * DM core internal structure that used directly by dm.c and dm-rq.c * DM targets must _not_ deference a mapped_device to directly access its members! @@ -63,6 +67,7 @@ struct mapped_device { /* * A list of ios that arrived while we were suspended. */ + struct dm_percpu __percpu *counters; struct work_struct work; wait_queue_head_t wait; spinlock_t deferred_lock; Index: linux-dm/drivers/md/dm.c =================================================================== --- linux-dm.orig/drivers/md/dm.c 2018-11-15 22:06:37.000000000 +0100 +++ linux-dm/drivers/md/dm.c 2018-11-15 22:09:31.000000000 +0100 @@ -648,19 +648,32 @@ static void free_tio(struct dm_target_io int md_in_flight(struct mapped_device *md) { - return atomic_read(&dm_disk(md)->part0.in_flight[READ]) + - atomic_read(&dm_disk(md)->part0.in_flight[WRITE]); + int cpu; + unsigned sum = 0; + for_each_possible_cpu(cpu) { + struct dm_percpu *p = per_cpu_ptr(md->counters, cpu); + sum += p->inflight[READ] + p->inflight[WRITE]; + } + return (int)sum; } static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; + struct hd_struct *part; + int sgrp, cpu; io->start_time = jiffies; - generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), - &dm_disk(md)->part0); + part = &dm_disk(md)->part0; + sgrp = op_stat_group(bio_op(bio)); + cpu = part_stat_lock(); + __part_stat_add(cpu, part, ios[sgrp], 1); + __part_stat_add(cpu, part, sectors[sgrp], bio_sectors(bio)); + part_stat_unlock(); + + this_cpu_inc(md->counters->inflight[bio_data_dir(bio)]); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), @@ -673,25 +686,24 @@ static void end_io_acct(struct dm_io *io struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; unsigned long duration = jiffies - io->start_time; + struct hd_struct *part; + int sgrp, cpu; if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), true, duration, &io->stats_aux); - /* - * make sure that atomic_dec in generic_end_io_acct is not reordered - * with previous writes - */ - smp_mb__before_atomic(); - generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, - io->start_time); - /* - * generic_end_io_acct does atomic_dec, this barrier makes sure that - * atomic_dec is not reordered with waitqueue_active - */ - smp_mb__after_atomic(); + part = &dm_disk(md)->part0; + sgrp = op_stat_group(bio_op(bio)); + cpu = part_stat_lock(); + __part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); + part_stat_unlock(); + + smp_wmb(); + this_cpu_dec(md->counters->inflight[bio_data_dir(bio)]); + smp_mb(); /* nudge anyone waiting on suspend queue */ if (unlikely(waitqueue_active(&md->wait))) { if (!md_in_flight(md)) @@ -1822,6 +1834,8 @@ static void cleanup_mapped_device(struct if (md->queue) blk_cleanup_queue(md->queue); + free_percpu(md->counters); + cleanup_srcu_struct(&md->io_barrier); if (md->bdev) { @@ -1892,6 +1906,10 @@ static struct mapped_device *alloc_dev(i if (!md->disk) goto bad; + md->counters = alloc_percpu(struct dm_percpu); + if (!md->counters) + goto bad; + init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq);