Add an ext3 bug-for-bug compatible analogue for data=ordered mode. In this mode, we force all delayed allocation blocks involved with the to-be-commited transaction to be allocated, and then flushed out to disk before the transaction is commited. Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx> --- fs/ext4/ext4.h | 6 +++- fs/ext4/ext4_jbd2.h | 3 +- fs/ext4/inode.c | 12 +++++++++++ fs/ext4/super.c | 51 ++++++++++++++++++++++++++++++++++++------------- fs/jbd2/commit.c | 3 ++ include/linux/jbd2.h | 2 + 6 files changed, 60 insertions(+), 17 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ebd1a50..b15b03e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -541,8 +541,9 @@ do { \ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ #define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT4_MOUNT_ORDERED_DATA 0x00000 /* Flush data before commit */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ -#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT4_MOUNT_ALLOC_COMMIT_DATA 0x00800 /* Alloc data on commit */ #define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ #define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ #define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ @@ -820,10 +821,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_DEFM_XATTR_USER 0x0004 #define EXT4_DEFM_ACL 0x0008 #define EXT4_DEFM_UID16 0x0010 -#define EXT4_DEFM_JMODE 0x0060 +#define EXT4_DEFM_JMODE 0x00E0 #define EXT4_DEFM_JMODE_DATA 0x0020 #define EXT4_DEFM_JMODE_ORDERED 0x0040 #define EXT4_DEFM_JMODE_WBACK 0x0060 +#define EXT4_DEFM_JMODE_ALLOC_COMMIT 0x00C0 /* * Default journal batch times diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index be2f426..0453671 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -274,7 +274,8 @@ static inline int ext4_should_order_data(struct inode *inode) return 0; if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + if ((test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) || + (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ALLOC_COMMIT_DATA)) return 1; return 0; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b58e7e2..ba0112b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2754,6 +2754,17 @@ static int ext4_da_write_end(struct file *file, "dev %s ino %lu pos %llu len %u copied %u", inode->i_sb->s_id, inode->i_ino, (unsigned long long) pos, len, copied); + + if (test_opt(inode->i_sb, DATA_FLAGS) == + EXT4_MOUNT_ALLOC_COMMIT_DATA) { + ret = ext4_jbd2_file_inode(handle, inode); + if (ret) + goto errout; + ret = ext4_mark_inode_dirty(handle, inode); + if (ret) + goto errout; + } + start = pos & (PAGE_CACHE_SIZE - 1); end = start + copied - 1; @@ -2791,6 +2802,7 @@ static int ext4_da_write_end(struct file *file, copied = ret2; if (ret2 < 0) ret = ret2; +errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3f32fb2..93e1bf9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -67,7 +67,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static int ext4_unfreeze(struct super_block *sb); static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); - +static void alloc_on_commit_callback(journal_t *journal); ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) @@ -849,6 +849,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",data=ordered"); else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) seq_puts(seq, ",data=writeback"); + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ALLOC_COMMIT_DATA) + seq_puts(seq, ",data=alloc_on_commit"); if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) seq_printf(seq, ",inode_readahead_blks=%u", @@ -1012,7 +1014,7 @@ enum { Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, + Opt_data_alloc_on_commit, Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, @@ -1056,6 +1058,7 @@ static const match_table_t tokens = { {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_data_alloc_on_commit, "data=alloc_on_commit"}, {Opt_data_err_abort, "data_err=abort"}, {Opt_data_err_ignore, "data_err=ignore"}, {Opt_offusrjquota, "usrjquota="}, @@ -1273,6 +1276,9 @@ static int parse_options(char *options, struct super_block *sb, case Opt_data_ordered: data_opt = EXT4_MOUNT_ORDERED_DATA; goto datacheck; + case Opt_data_alloc_on_commit: + data_opt = EXT4_MOUNT_ALLOC_COMMIT_DATA; + goto datacheck; case Opt_data_writeback: data_opt = EXT4_MOUNT_WRITEBACK_DATA; datacheck: @@ -1852,6 +1858,26 @@ static void ext4_orphan_cleanup(struct super_block *sb, #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } + +/* + * This callback is called before each commit when we are using + * alloc-on-commit mode. + */ +static void alloc_on_commit_callback(journal_t *journal) +{ + struct jbd2_inode *jinode, *next_i; + transaction_t *transaction = journal->j_running_transaction; + + spin_lock(&journal->j_list_lock); + list_for_each_entry_safe(jinode, next_i, + &transaction->t_inode_list, i_list) { + spin_unlock(&journal->j_list_lock); + ext4_alloc_da_blocks(jinode->i_vfs_inode); + spin_lock(&journal->j_list_lock); + } + spin_unlock(&journal->j_list_lock); +} + /* * Maximal extent format file size. * Resulting logical blkno at s_maxbytes must fit in our on-disk @@ -2283,6 +2309,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; + else if ((def_mount_opts & EXT4_DEFM_JMODE) == + EXT4_DEFM_JMODE_ALLOC_COMMIT) + sbi->s_mount_opt |= EXT4_MOUNT_ALLOC_COMMIT_DATA; if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); @@ -2654,18 +2683,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* We have now updated the journal if required, so we can * validate the data journaling mode. */ switch (test_opt(sb, DATA_FLAGS)) { - case 0: - /* No mode set, assume a default based on the journal - * capabilities: ORDERED_DATA if the journal can - * cope, else JOURNAL_DATA - */ - if (jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) - set_opt(sbi->s_mount_opt, ORDERED_DATA); - else - set_opt(sbi->s_mount_opt, JOURNAL_DATA); - break; - + case EXT4_MOUNT_ALLOC_COMMIT_DATA: + sbi->s_journal->j_pre_commit_callback = + alloc_on_commit_callback; case EXT4_MOUNT_ORDERED_DATA: case EXT4_MOUNT_WRITEBACK_DATA: if (!jbd2_journal_check_available_features @@ -2784,6 +2804,9 @@ no_journal: descr = " journalled data mode"; else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) descr = " ordered data mode"; + else if (test_opt(sb, DATA_FLAGS) == + EXT4_MOUNT_ALLOC_COMMIT_DATA) + descr = " alloc on commit data mode"; else descr = " writeback data mode"; } else diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 62804e5..e8a96e7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -379,6 +379,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) spin_unlock(&journal->j_list_lock); #endif + if (journal->j_pre_commit_callback) + journal->j_pre_commit_callback(journal); + /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 4d248b3..43b1689 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -975,6 +975,8 @@ struct journal_s u32 j_min_batch_time; u32 j_max_batch_time; + /* This function is called before a transaction is closed */ + void (*j_pre_commit_callback)(journal_t *); /* This function is called when a transaction is closed */ void (*j_commit_callback)(journal_t *, transaction_t *); -- 1.5.6.3 -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html