Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/ext4/ext4.h | 12 +++++++++--- fs/ext4/ext4_jbd2.h | 3 ++- fs/ext4/extents.c | 39 ++++++++++++++++++++++++++++----------- fs/ext4/inode.c | 32 ++++++++++++++++++++++++++++++-- fs/ext4/page-io.c | 11 ++++++++--- 5 files changed, 77 insertions(+), 20 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3c3827a..65adf0d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -182,10 +182,13 @@ struct ext4_map_blocks { #define EXT4_IO_END_DIRECT 0x0004 /* - * For converting uninitialized extents on a work queue. + * For converting uninitialized extents on a work queue. 'handle' is used for + * buffered writeback. */ typedef struct ext4_io_end { struct list_head list; /* per-file finished IO list */ + handle_t *handle; /* handle reserved for extent + * conversion */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ loff_t offset; /* offset in the file */ @@ -1314,6 +1317,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, struct ext4_io_end *io_end) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + /* Writeback has to have coversion transaction reserved */ + WARN_ON(!io_end->handle && + !(io_end->flag & EXT4_IO_END_DIRECT)); io_end->flag |= EXT4_IO_END_UNWRITTEN; atomic_inc(&EXT4_I(inode)->i_unwritten); } @@ -2550,8 +2556,8 @@ extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); -extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - ssize_t len); +extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len); extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern int ext4_ext_calc_metadata_amount(struct inode *inode, diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb17931..88e95d7 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -132,7 +132,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) #define EXT4_HT_MIGRATE 8 #define EXT4_HT_MOVE_EXTENTS 9 #define EXT4_HT_XATTR 10 -#define EXT4_HT_MAX 11 +#define EXT4_HT_EXT_CONVERT 11 +#define EXT4_HT_MAX 12 /** * struct ext4_journal_cb_entry - Base structure for callback information. diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8064b71..ae22735 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4484,10 +4484,9 @@ retry: * function, to convert the fallocated extents after IO is completed. * Returns 0 on success. */ -int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - ssize_t len) +int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len) { - handle_t *handle; unsigned int max_blocks; int ret = 0; int ret2 = 0; @@ -4502,16 +4501,31 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - map.m_lblk); /* - * credits to insert 1 extent into extent tree + * This is somewhat ugly but the idea is clear: When transaction is + * reserved, everything goes into it. Otherwise we rather start several + * smaller transactions for conversion of each extent separately. */ - credits = ext4_chunk_trans_blocks(inode, max_blocks); + if (handle) { + handle = ext4_journal_start_reserved(handle); + if (IS_ERR(handle)) + return PTR_ERR(handle); + credits = 0; + } else { + /* + * credits to insert 1 extent into extent tree + */ + credits = ext4_chunk_trans_blocks(inode, max_blocks); + } while (ret >= 0 && ret < max_blocks) { map.m_lblk += ret; map.m_len = (max_blocks -= ret); - handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - break; + if (credits) { + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, + credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + break; + } } ret = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_IO_CONVERT_EXT); @@ -4522,10 +4536,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, inode->i_ino, map.m_lblk, map.m_len, ret); ext4_mark_inode_dirty(handle, inode); - ret2 = ext4_journal_stop(handle); - if (ret <= 0 || ret2 ) + if (credits) + ret2 = ext4_journal_stop(handle); + if (ret <= 0 || ret2) break; } + if (!credits) + ret2 = ext4_journal_stop(handle); return ret > 0 ? ret2 : ret; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0602a09..f8e78ce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1327,6 +1327,8 @@ static void ext4_da_page_release_reservation(struct page *page, struct mpage_da_data { struct inode *inode; struct writeback_control *wbc; + handle_t *reserved_handle; /* Handle reserved for conversion */ + pgoff_t first_page; /* The first page to write */ pgoff_t next_page; /* Current page to examine */ pgoff_t last_page; /* Last page to examine */ @@ -1973,8 +1975,13 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) err = ext4_map_blocks(handle, inode, map, get_blocks_flags); if (err < 0) return err; - if (map->m_flags & EXT4_MAP_UNINIT) + if (map->m_flags & EXT4_MAP_UNINIT) { + if (!mpd->io_submit.io_end->handle) { + mpd->io_submit.io_end->handle = mpd->reserved_handle; + mpd->reserved_handle = NULL; + } ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); + } BUG_ON(map->m_len == 0); if (map->m_flags & EXT4_MAP_NEW) { @@ -2274,6 +2281,7 @@ static int ext4_da_writepages(struct address_space *mapping, mpd.inode = inode; mpd.wbc = wbc; + mpd.reserved_handle = NULL; ext4_io_submit_init(&mpd.io_submit, wbc); retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) @@ -2288,6 +2296,23 @@ retry: break; } + /* Reserve handle if it may be needed for extent conversion */ + if (ext4_should_dioread_nolock(inode) && !mpd.reserved_handle) { + /* + * We may need to convert upto one extent per block in + * the page and we may dirty the inode. + */ + mpd.reserved_handle = ext4_journal_reserve(inode, + EXT4_HT_EXT_CONVERT, + 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits)); + if (IS_ERR(mpd.reserved_handle)) { + ret = PTR_ERR(mpd.reserved_handle); + mpd.reserved_handle = NULL; + ext4_put_io_end(mpd.io_submit.io_end); + break; + } + } + /* * We have two constraints: We find one extent to map and we * must always write out whole page (makes a difference when @@ -2364,6 +2389,9 @@ retry: */ mapping->writeback_index = mpd.first_page; + if (mpd.reserved_handle) + ext4_journal_free_reserved(mpd.reserved_handle); + out_writepages: trace_ext4_da_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); @@ -2977,7 +3005,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * for non AIO case, since the IO is already * completed, we could do the conversion right here */ - err = ext4_convert_unwritten_extents(inode, + err = ext4_convert_unwritten_extents(NULL, inode, offset, ret); if (err < 0) ret = err; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index cc59cd9..e8ee4da 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -55,6 +55,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) { BUG_ON(!list_empty(&io_end->list)); BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); + WARN_ON(io_end->handle); if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) wake_up_all(ext4_ioend_wq(io_end->inode)); @@ -81,13 +82,15 @@ static int ext4_end_io(ext4_io_end_t *io) struct inode *inode = io->inode; loff_t offset = io->offset; ssize_t size = io->size; + handle_t *handle = io->handle; int ret = 0; ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", io, inode->i_ino, io->list.next, io->list.prev); - ret = ext4_convert_unwritten_extents(inode, offset, size); + io->handle = NULL; /* Following call will use up the handle */ + ret = ext4_convert_unwritten_extents(handle, inode, offset, size); if (ret < 0) { ext4_msg(inode->i_sb, KERN_EMERG, "failed to convert unwritten extents to written " @@ -217,8 +220,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end) if (atomic_dec_and_test(&io_end->count)) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) { - err = ext4_convert_unwritten_extents(io_end->inode, - io_end->offset, io_end->size); + err = ext4_convert_unwritten_extents(io_end->handle, + io_end->inode, io_end->offset, + io_end->size); + io_end->handle = NULL; ext4_clear_io_unwritten_flag(io_end); } ext4_release_io_end(io_end); -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html