Test pass with this patch, Thanks Ming and Christoph ! Tested-by: Changhui Zhong <czhong@xxxxxxxxxx> On Wed, Apr 20, 2022 at 10:31 PM Ming Lei <ming.lei@xxxxxxxxxx> wrote: > > So far bio is marked as REQ_POLLED if RWF_HIPRI/IOCB_HIPRI is passed > from userspace sync io interface, then block layer tries to poll until > the bio is completed. But the current implementation calls > blk_io_schedule() if bio_poll() returns 0, and this way causes io hang or > timeout easily. > > But looks no one reports this kind of issue, which should have been > triggered in normal io poll sanity test or blktests block/007 as > observed by Changhui, that means it is very likely that no one uses it > or no one cares it. > > Also after io_uring is invented, io poll for sync dio becomes legacy > interface. > > So ignore RWF_HIPRI hint for sync dio. > > CC: linux-mm@xxxxxxxxx > Cc: linux-xfs@xxxxxxxxxxxxxxx > Reported-by: Changhui Zhong <czhong@xxxxxxxxxx> > Suggested-by: Christoph Hellwig <hch@xxxxxx> > Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> > --- > V2: > - avoid to break io_uring async polling as pointed by Chritoph > > block/fops.c | 22 +--------------------- > fs/iomap/direct-io.c | 7 +++---- > mm/page_io.c | 4 +--- > 3 files changed, 5 insertions(+), 28 deletions(-) > > diff --git a/block/fops.c b/block/fops.c > index e3643362c244..b9b83030e0df 100644 > --- a/block/fops.c > +++ b/block/fops.c > @@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) > > #define DIO_INLINE_BIO_VECS 4 > > -static void blkdev_bio_end_io_simple(struct bio *bio) > -{ > - struct task_struct *waiter = bio->bi_private; > - > - WRITE_ONCE(bio->bi_private, NULL); > - blk_wake_io_task(waiter); > -} > - > static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, > struct iov_iter *iter, unsigned int nr_pages) > { > @@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, > bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); > } > bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; > - bio.bi_private = current; > - bio.bi_end_io = blkdev_bio_end_io_simple; > bio.bi_ioprio = iocb->ki_ioprio; > > ret = bio_iov_iter_get_pages(&bio, iter); > @@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, > > if (iocb->ki_flags & IOCB_NOWAIT) > bio.bi_opf |= REQ_NOWAIT; > - if (iocb->ki_flags & IOCB_HIPRI) > - bio_set_polled(&bio, iocb); > > - submit_bio(&bio); > - for (;;) { > - set_current_state(TASK_UNINTERRUPTIBLE); > - if (!READ_ONCE(bio.bi_private)) > - break; > - if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0)) > - blk_io_schedule(); > - } > - __set_current_state(TASK_RUNNING); > + submit_bio_wait(&bio); > > bio_release_pages(&bio, should_dirty); > if (unlikely(bio.bi_status)) > diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c > index 62da020d02a1..80f9b047aa1b 100644 > --- a/fs/iomap/direct-io.c > +++ b/fs/iomap/direct-io.c > @@ -56,7 +56,8 @@ static void iomap_dio_submit_bio(const struct iomap_iter *iter, > { > atomic_inc(&dio->ref); > > - if (dio->iocb->ki_flags & IOCB_HIPRI) { > + /* Sync dio can't be polled reliably */ > + if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) { > bio_set_polled(bio, dio->iocb); > dio->submit.poll_bio = bio; > } > @@ -653,9 +654,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, > if (!READ_ONCE(dio->submit.waiter)) > break; > > - if (!dio->submit.poll_bio || > - !bio_poll(dio->submit.poll_bio, NULL, 0)) > - blk_io_schedule(); > + blk_io_schedule(); > } > __set_current_state(TASK_RUNNING); > } > diff --git a/mm/page_io.c b/mm/page_io.c > index 89fbf3cae30f..3fbdab6a940e 100644 > --- a/mm/page_io.c > +++ b/mm/page_io.c > @@ -360,7 +360,6 @@ int swap_readpage(struct page *page, bool synchronous) > * attempt to access it in the page fault retry time check. > */ > if (synchronous) { > - bio->bi_opf |= REQ_POLLED; > get_task_struct(current); > bio->bi_private = current; > } > @@ -372,8 +371,7 @@ int swap_readpage(struct page *page, bool synchronous) > if (!READ_ONCE(bio->bi_private)) > break; > > - if (!bio_poll(bio, NULL, 0)) > - blk_io_schedule(); > + blk_io_schedule(); > } > __set_current_state(TASK_RUNNING); > bio_put(bio); > -- > 2.31.1