Instead of a convoluted chain just check for REQ_OP_READ directly, and keep all the memory stall code together in a single unlikely branch. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- block/blk-core.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 68351ee94ad2e..81a291085c6ca 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1148,10 +1148,6 @@ EXPORT_SYMBOL_GPL(direct_make_request); */ blk_qc_t submit_bio(struct bio *bio) { - bool workingset_read = false; - unsigned long pflags; - blk_qc_t ret; - if (blkcg_punt_bio_submit(bio)) return BLK_QC_T_NONE; @@ -1170,8 +1166,6 @@ blk_qc_t submit_bio(struct bio *bio) if (op_is_write(bio_op(bio))) { count_vm_events(PGPGOUT, count); } else { - if (bio_flagged(bio, BIO_WORKINGSET)) - workingset_read = true; task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -1187,20 +1181,24 @@ blk_qc_t submit_bio(struct bio *bio) } /* - * If we're reading data that is part of the userspace - * workingset, count submission time as memory stall. When the - * device is congested, or the submitting cgroup IO-throttled, - * submission can be a significant part of overall IO time. + * If we're reading data that is part of the userspace workingset, count + * submission time as memory stall. When the device is congested, or + * the submitting cgroup IO-throttled, submission can be a significant + * part of overall IO time. */ - if (workingset_read) - psi_memstall_enter(&pflags); - - ret = generic_make_request(bio); + if (unlikely(bio_op(bio) == REQ_OP_READ && + bio_flagged(bio, BIO_WORKINGSET))) { + unsigned long pflags; + blk_qc_t ret; - if (workingset_read) + psi_memstall_enter(&pflags); + ret = generic_make_request(bio); psi_memstall_leave(&pflags); - return ret; + return ret; + } + + return generic_make_request(bio); } EXPORT_SYMBOL(submit_bio); -- 2.26.1