This implements the journal's j_submit_inode_data_buffers() callback to write-protect the inode's pages with write_cache_pages(), and use a writepage callback to redirty pages with buffers that are not part of the committing transaction or the next transaction. And set a no-op function as j_finish_inode_data_buffers() callback (nothing needed other than the write-protect above.) Currently, the inode is added to the transaction's inode list in the __ext4_journalled_writepage() function. --- fs/ext4/inode.c | 4 +++ fs/ext4/super.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 10dd470876b3..978ccde8454f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1911,6 +1911,10 @@ static int __ext4_journalled_writepage(struct page *page, err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, write_end_fn); } + if (ret == 0) + ret = err; + // XXX: is this correct for inline data inodes? + err = ext4_jbd2_inode_add_write(handle, inode, 0, len); if (ret == 0) ret = err; EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 330957ed1f05..38aaac6572ea 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -472,6 +472,66 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) spin_unlock(&sbi->s_md_lock); } +/* + * This writepage callback for write_cache_pages() + * takes care of a few cases after page cleaning. + * + * write_cache_pages() already checks for dirty pages + * and calls clear_page_dirty_for_io(), which we want, + * to write protect the pages. + * + * However, we have to redirty a page in two cases: + * 1) some buffer is not part of the committing transaction + * 2) some buffer already has b_next_transaction set + */ + +static int ext4_journalled_writepage_callback(struct page *page, + struct writeback_control *wbc, + void *data) +{ + transaction_t *transaction = (transaction_t *) data; + struct buffer_head *bh, *head; + struct journal_head *jh; + + // XXX: any chance of !bh here? + bh = head = page_buffers(page); + do { + jh = bh2jh(bh); + if (!jh || jh->b_transaction != transaction || + jh->b_next_transaction) { + redirty_page_for_writepage(wbc, page); + goto out; + } + } while ((bh = bh->b_this_page) != head); + +out: + return AOP_WRITEPAGE_ACTIVATE; +} + +static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode) +{ + struct address_space *mapping = jinode->i_vfs_inode->i_mapping; + transaction_t *transaction = jinode->i_transaction; + loff_t dirty_start = jinode->i_dirty_start; + loff_t dirty_end = jinode->i_dirty_end; + + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = mapping->nrpages * 2, + .range_start = dirty_start, + .range_end = dirty_end, + }; + + return write_cache_pages(mapping, &wbc, + ext4_journalled_writepage_callback, + transaction); +} + +static int ext4_journalled_finish_inode_data_buffers(struct jbd2_inode *jinode) +{ + return 0; +} + static bool system_going_down(void) { return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF @@ -4599,6 +4659,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "can't mount with " "journal_async_commit in data=ordered mode"); goto failed_mount_wq; + } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + sbi->s_journal->j_submit_inode_data_buffers = + ext4_journalled_submit_inode_data_buffers; + sbi->s_journal->j_finish_inode_data_buffers = + ext4_journalled_finish_inode_data_buffers; } set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); -- 2.17.1