From: Jérôme Glisse <jglisse@xxxxxxxxxx> We want to keep track of how we got a reference on page when doing DIO, ie wether the page was reference through GUP (get_user_page*) or not. For that this patch rework the way page reference is taken and handed over between DIO code and BIO. Instead of taking a reference for page that have been successfuly added to a BIO we just steal the reference we have when we lookup the page (either through GUP or for ZERO_PAGE). So this patch keep track of wether the reference has been stolen by the BIO or not. This avoids a bunch of get_page()/put_page() so this limit the number of atomic operations. Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx> Cc: linux-fsdevel@xxxxxxxxxxxxxxx Cc: linux-block@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Cc: John Hubbard <jhubbard@xxxxxxxxxx> Cc: Jan Kara <jack@xxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Johannes Thumshirn <jthumshirn@xxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: Ming Lei <ming.lei@xxxxxxxxxx> Cc: Dave Chinner <david@xxxxxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxx> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> Cc: Ernesto A. Fernández <ernesto.mnd.fernandez@xxxxxxxxx> Cc: Jeff Moyer <jmoyer@xxxxxxxxxx> --- fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 22 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index b8b5d8e31aeb..ef9fc7703a78 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -100,6 +100,7 @@ struct dio_submit { unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ sector_t cur_page_block; /* Where it starts */ loff_t cur_page_fs_offset; /* Offset in file */ + bool cur_page_from_gup; /* Current page is coming from GUP */ struct iov_iter *iter; /* @@ -148,6 +149,8 @@ struct dio { struct page *pages[DIO_PAGES]; /* page buffer */ struct work_struct complete_work;/* deferred AIO completion */ }; + + bool gup; /* pages are coming from GUP */ } ____cacheline_aligned_in_smp; static struct kmem_cache *dio_cache __read_mostly; @@ -167,6 +170,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { ssize_t ret; + dio->gup = iov_iter_get_pages_use_gup(sdio->iter); ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, &sdio->from); @@ -181,6 +185,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) dio->page_errors = ret; get_page(page); dio->pages[0] = page; + dio->gup = false; sdio->head = 0; sdio->tail = 1; sdio->from = 0; @@ -490,8 +495,12 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) */ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) { - while (sdio->head < sdio->tail) - put_page(dio->pages[sdio->head++]); + while (sdio->head < sdio->tail) { + if (dio->gup) + put_user_page(dio->pages[sdio->head++]); + else + put_page(dio->pages[sdio->head++]); + } } /* @@ -760,15 +769,19 @@ static inline int dio_bio_add_page(struct dio_submit *sdio) { int ret; - ret = bio_add_page(sdio->bio, sdio->cur_page, - sdio->cur_page_len, sdio->cur_page_offset, false); + /* + * The bio is stealing the page reference and that is fine we can add a + * page only once ie when dio_send_cur_page() is call and each call to + * dio_send_cur_page() clear the cur_page (on success). + */ + ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len, + sdio->cur_page_offset, sdio->cur_page_from_gup); if (ret == sdio->cur_page_len) { /* * Decrement count only, if we are done with this page */ if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE) sdio->pages_in_io--; - get_page(sdio->cur_page); sdio->final_block_in_bio = sdio->cur_page_block + (sdio->cur_page_len >> sdio->blkbits); ret = 0; @@ -828,9 +841,14 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh); if (ret == 0) { ret = dio_bio_add_page(sdio); + if (!ret) + /* Clear the current page. */ + sdio->cur_page = NULL; BUG_ON(ret != 0); } - } + } else + /* Clear the current page. */ + sdio->cur_page = NULL; out: return ret; } @@ -855,7 +873,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, static inline int submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, unsigned offset, unsigned len, sector_t blocknr, - struct buffer_head *map_bh) + struct buffer_head *map_bh, bool gup) { int ret = 0; @@ -882,14 +900,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, */ if (sdio->cur_page) { ret = dio_send_cur_page(dio, sdio, map_bh); - put_page(sdio->cur_page); - sdio->cur_page = NULL; if (ret) return ret; } - get_page(page); /* It is in dio */ + /* Steal page reference and GUP flag */ sdio->cur_page = page; + sdio->cur_page_from_gup = gup; sdio->cur_page_offset = offset; sdio->cur_page_len = len; sdio->cur_page_block = blocknr; @@ -903,8 +920,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, ret = dio_send_cur_page(dio, sdio, map_bh); if (sdio->bio) dio_bio_submit(dio, sdio); - put_page(sdio->cur_page); - sdio->cur_page = NULL; } return ret; } @@ -946,13 +961,29 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio, this_chunk_bytes = this_chunk_blocks << sdio->blkbits; page = ZERO_PAGE(0); + get_page(page); if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes, - sdio->next_block_for_io, map_bh)) + sdio->next_block_for_io, map_bh, false)) { + put_page(page); return; + } sdio->next_block_for_io += this_chunk_blocks; } +static inline void dio_put_page(const struct dio *dio, bool stolen, + struct page *page) +{ + /* If page reference was stolen then nothing to do. */ + if (stolen) + return; + + if (dio->gup) + put_user_page(page); + else + put_page(page); +} + /* * Walk the user pages, and the file, mapping blocks to disk and generating * a sequence of (page,offset,len,block) mappings. These mappings are injected @@ -977,6 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, int ret = 0; while (sdio->block_in_file < sdio->final_block_in_request) { + bool stolen = false; struct page *page; size_t from, to; @@ -1003,7 +1035,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, ret = get_more_blocks(dio, sdio, map_bh); if (ret) { - put_page(page); + dio_put_page(dio, stolen, page); goto out; } if (!buffer_mapped(map_bh)) @@ -1048,7 +1080,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, /* AKPM: eargh, -ENOTBLK is a hack */ if (dio->op == REQ_OP_WRITE) { - put_page(page); + dio_put_page(dio, stolen, page); return -ENOTBLK; } @@ -1061,7 +1093,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, if (sdio->block_in_file >= i_size_aligned >> blkbits) { /* We hit eof */ - put_page(page); + dio_put_page(dio, stolen, page); goto out; } zero_user(page, from, 1 << blkbits); @@ -1099,11 +1131,13 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, from, this_chunk_bytes, sdio->next_block_for_io, - map_bh); + map_bh, dio->gup); if (ret) { - put_page(page); + dio_put_page(dio, stolen, page); goto out; - } + } else + /* The page reference has been stolen ... */ + stolen = true; sdio->next_block_for_io += this_chunk_blocks; sdio->block_in_file += this_chunk_blocks; @@ -1117,7 +1151,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, } /* Drop the ref which was taken in get_user_pages() */ - put_page(page); + dio_put_page(dio, stolen, page); } out: return ret; @@ -1356,8 +1390,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, ret2 = dio_send_cur_page(dio, &sdio, &map_bh); if (retval == 0) retval = ret2; - put_page(sdio.cur_page); - sdio.cur_page = NULL; + else { + if (sdio.cur_page_from_gup) + put_user_page(sdio.cur_page); + else + put_page(sdio.cur_page); + } } if (sdio.bio) dio_bio_submit(dio, &sdio); -- 2.20.1