On 4/2/21 8:32 AM, Pavel Begunkov wrote: > On 15/12/2020 09:43, JeffleXu wrote: >> Thanks for your explanation, again. > > Got stale, let's bring it up again. How about something like this - check upfront if we're going to be using multiple bios, and -EAGAIN for NOWAIT being set if that is the case. That avoids the partial problem, and still retains (what I consider) proper NOWAIT behavior for O_DIRECT with IOCB_NOWAIT set. It's also worth nothing that this condition exists already for polled IO. If the bio is marked as polled, then we implicitly set NOWAIT as well, as there's no way to support polled IO with sleeping request allocations. Hence it's worth considering this a fix for that case, too. diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index e2c4991833b8..6f932fe99440 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -66,6 +66,8 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, if (dio->iocb->ki_flags & IOCB_HIPRI) bio_set_polled(bio, dio->iocb); + if (dio->iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; dio->submit.last_queue = bdev_get_queue(iomap->bdev); if (dio->dops && dio->dops->submit_io) @@ -236,6 +238,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); unsigned int fs_block_size = i_blocksize(inode), pad; unsigned int align = iov_iter_alignment(dio->submit.iter); + bool nowait = dio->iocb->ki_flags & (IOCB_HIPRI | IOCB_NOWAIT); unsigned int bio_opf; struct bio *bio; bool need_zeroout = false; @@ -296,7 +299,17 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, */ bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua); - nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_PAGES); + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, INT_MAX); + + /* Can't handle IOCB_NOWAIT for split bios */ + if (nr_pages > BIO_MAX_PAGES) { + if (nowait) { + ret = -EAGAIN; + goto out; + } + nr_pages = BIO_MAX_PAGES; + } + do { size_t n; if (dio->error) { @@ -326,6 +339,19 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, goto zero_tail; } + /* + * If there are leftover pages, bail if nowait is set to avoid + * multiple bios and potentially having one of them -EAGAIN + * with the other succeeding. + */ + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, + BIO_MAX_PAGES); + if (nr_pages && nowait) { + ret = -EAGAIN; + bio_put(bio); + goto out; + } + n = bio->bi_iter.bi_size; if (dio->flags & IOMAP_DIO_WRITE) { task_io_account_write(n); @@ -337,8 +363,6 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, dio->size += n; copied += n; - nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, - BIO_MAX_PAGES); iomap_dio_submit_bio(dio, iomap, bio, pos); pos += n; } while (nr_pages); -- Jens Axboe