Previously, io_ticks could be under-counted. Consider these I/Os along the time axis (in jiffies): t 012345678 io1 |----| io2 |---| Under the old approach, io_ticks would count up to 6, like so: t 012345678 io1 |----| io2 |---| stamp 0 45 8 io_ticks 1 23 6 With this change, io_ticks instead counts to 8, eliminating the under-counting: t 012345678 io1 |----| io2 |---| stamp 0 5 8 io_ticks 0 5 8 It was also possible for io_ticks to be over-counted. Consider a workload that issues I/Os deterministically at intervals of 8ms (125Hz). If each I/O takes 1ms, then the true utilization is 12.5%. The previous implementation will increment io_ticks once for each jiffy in which an I/O ends. Since the workload issues an I/O reliably for each jiffy, the reported utilization will be 100%. This commit changes the approach such that only I/Os which cross a boundary between jiffies are counted. With this change, the given workload would count an I/O tick on every eighth jiffy, resulting in a (correct) calculated utilization of 12.5%. Signed-off-by: Josh Snyder <joshs@xxxxxxxxxxx> Fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting") --- block/blk-core.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index d1b79dfe9540..a0bbd9e099b9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1396,14 +1396,22 @@ unsigned int blk_rq_err_bytes(const struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_err_bytes); -static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) +static void update_io_ticks(struct hd_struct *part, unsigned long now, unsigned long start) { unsigned long stamp; + unsigned long elapsed; again: stamp = READ_ONCE(part->stamp); if (unlikely(stamp != now)) { - if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) - __part_stat_add(part, io_ticks, end ? now - stamp : 1); + if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { + // stamp denotes the last IO to finish + // If this IO started before stamp, then there was overlap between this IO + // and that one. We increment only by the non-overlap time. + // If not, there was no overlap and we increment by our own time, + // disregarding stamp. + elapsed = now - (start < stamp ? stamp : start); + __part_stat_add(part, io_ticks, elapsed); + } } if (part->partno) { part = &part_to_disk(part)->part0; @@ -1439,7 +1447,7 @@ void blk_account_io_done(struct request *req, u64 now) part_stat_lock(); part = req->part; - update_io_ticks(part, jiffies, true); + update_io_ticks(part, jiffies, nsecs_to_jiffies(req->start_time_ns)); part_stat_inc(part, ios[sgrp]); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); part_stat_unlock(); @@ -1456,7 +1464,6 @@ void blk_account_io_start(struct request *rq) rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); part_stat_lock(); - update_io_ticks(rq->part, jiffies, false); part_stat_unlock(); } @@ -1468,7 +1475,6 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned long now = READ_ONCE(jiffies); part_stat_lock(); - update_io_ticks(part, now, false); part_stat_inc(part, ios[sgrp]); part_stat_add(part, sectors[sgrp], sectors); part_stat_local_inc(part, in_flight[op_is_write(op)]); @@ -1487,7 +1493,7 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long duration = now - start_time; part_stat_lock(); - update_io_ticks(part, now, true); + update_io_ticks(part, now, start_time); part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(part, in_flight[op_is_write(op)]); part_stat_unlock(); -- 2.25.1