This removes the dependency on interrupts to wake up task. Set task state as TASK_RUNNING, if need_resched() returns true, while polling for IO completion. Earlier, polling task used to sleep, relying on interrupt to wake it up. This made some IO take very long when interrupt-coalescing is enabled in NVMe. Reference: http://lists.infradead.org/pipermail/linux-nvme/2018-February/015435.html Signed-off-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> --- fs/block_dev.c | 16 ++++++++++++---- fs/direct-io.c | 8 ++++++-- fs/iomap.c | 10 +++++++--- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 4a181fc..a87d8b7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -236,9 +236,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(bio.bi_private)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc)) + if (!(iocb->ki_flags & IOCB_HIPRI)) io_schedule(); + else if (!blk_poll(bdev_get_queue(bdev), qc)) { + if (need_resched()) + set_current_state(TASK_RUNNING); + io_schedule(); + } } __set_current_state(TASK_RUNNING); @@ -401,9 +405,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (!READ_ONCE(dio->waiter)) break; - if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc)) + if (!(iocb->ki_flags & IOCB_HIPRI)) io_schedule(); + else if (!blk_poll(bdev_get_queue(bdev), qc)) { + if (need_resched()) + set_current_state(TASK_RUNNING); + io_schedule(); + } } __set_current_state(TASK_RUNNING); diff --git a/fs/direct-io.c b/fs/direct-io.c index a0ca9e4..c815ac9 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -518,9 +518,13 @@ static struct bio *dio_await_one(struct dio *dio) __set_current_state(TASK_UNINTERRUPTIBLE); dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(dio->bio_disk->queue, dio->bio_cookie)) + if (!(dio->iocb->ki_flags & IOCB_HIPRI)) io_schedule(); + else if (!blk_poll(dio->bio_disk->queue, dio->bio_cookie)) { + if (need_resched()) + __set_current_state(TASK_RUNNING); + io_schedule(); + } /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); dio->waiter = NULL; diff --git a/fs/iomap.c b/fs/iomap.c index afd1635..b51569d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1072,10 +1072,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, break; if (!(iocb->ki_flags & IOCB_HIPRI) || - !dio->submit.last_queue || - !blk_poll(dio->submit.last_queue, - dio->submit.cookie)) + !dio->submit.last_queue) io_schedule(); + else if (!blk_poll(dio->submit.last_queue, + dio->submit.cookie)) { + if (need_resched()) + set_current_state(TASK_RUNNING); + io_schedule(); + } } __set_current_state(TASK_RUNNING); } -- 2.7.4