Introduce journal callbacks to allow different behaviors for an inode in journal_submit|finish_inode_data_buffers(). The existing users of the current behavior (ext4, ocfs2) are adapted to use the previously exported functions that implement the current behavior. Users are callers of jbd2_journal_inode_ranged_write|wait(), which adds the inode to the transaction's inode list with the JI_WRITE|WAIT_DATA flags. Only ext4 and ocfs2 in-tree. Signed-off-by: Mauricio Faria de Oliveira <mfo@xxxxxxxxxxxxx> Suggested-by: Jan Kara <jack@xxxxxxx> --- fs/ext4/super.c | 14 ++++++++++++++ fs/jbd2/commit.c | 30 ++++++++++++++++++------------ fs/ocfs2/super.c | 15 +++++++++++++++ include/linux/jbd2.h | 25 ++++++++++++++++++++++++- 4 files changed, 71 insertions(+), 13 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ea425b49b345..7303839d7ad9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -472,6 +472,16 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) spin_unlock(&sbi->s_md_lock); } +static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) +{ + return jbd2_journal_submit_inode_data_buffers(jinode); +} + +static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) +{ + return jbd2_journal_finish_inode_data_buffers(jinode); +} + static bool system_going_down(void) { return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF @@ -4646,6 +4656,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; + sbi->s_journal->j_submit_inode_data_buffers = + ext4_journal_submit_inode_data_buffers; + sbi->s_journal->j_finish_inode_data_buffers = + ext4_journal_finish_inode_data_buffers; no_journal: if (!test_opt(sb, NO_MBCACHE)) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index c17cda96926e..23d3fcc11b97 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -200,6 +200,12 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) .range_end = dirty_end, }; + /* + * submit the inode data buffers. We use writepage + * instead of writepages. Because writepages can do + * block allocation with delalloc. We need to write + * only allocated blocks here. + */ ret = generic_writepages(mapping, &wbc); return ret; } @@ -224,16 +230,13 @@ static int journal_submit_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - /* - * submit the inode data buffers. We use writepage - * instead of writepages. Because writepages can do - * block allocation with delalloc. We need to write - * only allocated blocks here. - */ + /* submit the inode data buffers. */ trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - err = jbd2_journal_submit_inode_data_buffers(jinode); - if (!ret) - ret = err; + if (journal->j_submit_inode_data_buffers) { + err = journal->j_submit_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); jinode->i_flags &= ~JI_COMMIT_RUNNING; @@ -273,9 +276,12 @@ static int journal_finish_inode_data_buffers(journal_t *journal, continue; jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); - err = jbd2_journal_finish_inode_data_buffers(jinode); - if (!ret) - ret = err; + /* wait for the inode data buffers writeout. */ + if (journal->j_finish_inode_data_buffers) { + err = journal->j_finish_inode_data_buffers(jinode); + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; smp_mb(); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1d91dd1e8711..f4e62aafc89c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2010,6 +2010,16 @@ static int ocfs2_journal_addressable(struct ocfs2_super *osb) return status; } +static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) +{ + return jbd2_journal_submit_inode_data_buffers(jinode); +} + +static int ocfs2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode) +{ + return jbd2_journal_finish_inode_data_buffers(jinode); +} + static int ocfs2_initialize_super(struct super_block *sb, struct buffer_head *bh, int sector_size, @@ -2211,6 +2221,11 @@ static int ocfs2_initialize_super(struct super_block *sb, } osb->journal = journal; journal->j_osb = osb; + journal->j_journal->j_submit_inode_data_buffers = + ocfs2_journal_submit_inode_data_buffers; + journal->j_journal->j_finish_inode_data_buffers = + ocfs2_journal_finish_inode_data_buffers; + atomic_set(&journal->j_num_trans, 0); init_rwsem(&journal->j_trans_barrier); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2865a5475888..4aaa408c0ca7 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -629,7 +629,9 @@ struct transaction_s struct journal_head *t_shadow_list; /* - * List of inodes whose data we've modified in data=ordered mode. + * List of inodes associated with the transaction; e.g., ext4 uses + * this to track inodes in data=ordered and data=journal mode that + * need special handling on transaction commit; also used by ocfs2. * [j_list_lock] */ struct list_head t_inode_list; @@ -1111,6 +1113,27 @@ struct journal_s void (*j_commit_callback)(journal_t *, transaction_t *); + /** + * @j_submit_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WRITE_DATA flag + * before we start to write out the transaction to the journal. + */ + int (*j_submit_inode_data_buffers) + (struct jbd2_inode *); + + /** + * @j_finish_inode_data_buffers: + * + * This function is called for all inodes associated with the + * committing transaction marked with JI_WAIT_DATA flag + * after we have written the transaction to the journal + * but before we write out the commit block. + */ + int (*j_finish_inode_data_buffers) + (struct jbd2_inode *); + /* * Journal statistics */ -- 2.17.1