Ensure that writes to the dio/bio waiter field are ordered correctly. With the smp_rmb() before the READ_ONCE() check, we should be able to use a more relaxed ordering for the task state setting. We don't need a heavier barrier on the wakeup side after writing the waiter field, since we either going to be in the task we care about, or go through wake_up_process() which implies a strong enough barrier. For the core poll helper, the task state setting don't need to imply any atomics, as it's the current task itself that is being modified and we're not going to sleep. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- block/blk-mq.c | 4 ++-- fs/block_dev.c | 9 +++++++-- fs/iomap.c | 4 +++- mm/page_io.c | 4 +++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 32b246ed44c0..7fc4abb4cc36 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3331,12 +3331,12 @@ static bool __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) ret = q->mq_ops->poll(hctx, rq->tag); if (ret > 0) { hctx->poll_success++; - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); return true; } if (signal_pending_state(state, current)) - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); if (current->state == TASK_RUNNING) return true; diff --git a/fs/block_dev.c b/fs/block_dev.c index c039abfb2052..5b754f84c814 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -237,9 +237,12 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, qc = submit_bio(&bio); for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(bio.bi_private)) break; + if (!(iocb->ki_flags & IOCB_HIPRI) || !blk_poll(bdev_get_queue(bdev), qc)) io_schedule(); @@ -403,7 +406,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) return -EIOCBQUEUED; for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(dio->waiter)) break; diff --git a/fs/iomap.c b/fs/iomap.c index f61d13dfdf09..3373ea4984d9 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1888,7 +1888,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, return -EIOCBQUEUED; for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(dio->submit.waiter)) break; diff --git a/mm/page_io.c b/mm/page_io.c index d4d1c89bcddd..008f6d00c47c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -405,7 +405,9 @@ int swap_readpage(struct page *page, bool synchronous) bio_get(bio); qc = submit_bio(bio); while (synchronous) { - set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(TASK_UNINTERRUPTIBLE); + + smp_rmb(); if (!READ_ONCE(bio->bi_private)) break; -- 2.17.1