Re: [PATCH 1/2] ext4: fast commit may not fallback for ineligible commit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nice this is a pretty good catch! One thing I'd note is that I'm
working on a patch that would improve performance of fast commit by
locking only one inode at a time instead of locking the entire file
system from performing journal updates. (In other words, with my patch
we'll drop jbd2_journal_lock_updates() before performing the fast
commit) What that patch would do is that it would add a new "h_priv"
field in handle_t where we would store a pointer to the inode which is
being modified. Once that patch is in, we can modify
ext4_fc_mark_ineligible() to only take handle and sb can be derived
out of it as handle->h_priv->i_sb. I can take care of doing that
change in my patch.

Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx>

On Fri, Jan 7, 2022 at 4:12 AM Xin Yin <yinxin.x@xxxxxxxxxxxxx> wrote:
>
> for the follow scenario:
> 1. jbd start commit transaction n
> 2. task A get new handle for transaction n+1
> 3. task A do some ineligible actions and mark FC_INELIGIBLE
> 4. jbd complete transaction n and clean FC_INELIGIBLE
> 5. task A call fsync
>
> in this case fast commit will not fallback to full commit and
> transaction n+1 also not handled by jbd.
>
> make ext4_fc_mark_ineligible() also record transaction tid for
> latest ineligible case, when call ext4_fc_cleanup() check
> current transaction tid, if small than latest ineligible tid
> do not clear the EXT4_MF_FC_INELIGIBLE.
>
> Signed-off-by: Xin Yin <yinxin.x@xxxxxxxxxxxxx>
> ---
>  fs/ext4/ext4.h        |  3 ++-
>  fs/ext4/extents.c     |  4 ++--
>  fs/ext4/fast_commit.c | 19 +++++++++++++------
>  fs/ext4/inode.c       |  4 ++--
>  fs/ext4/ioctl.c       |  4 ++--
>  fs/ext4/namei.c       |  4 ++--
>  fs/ext4/super.c       |  1 +
>  fs/ext4/xattr.c       |  6 +++---
>  fs/jbd2/commit.c      |  2 +-
>  fs/jbd2/journal.c     |  2 +-
>  include/linux/jbd2.h  |  2 +-
>  11 files changed, 30 insertions(+), 21 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 7b0686758691..a060bb56e654 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1747,6 +1747,7 @@ struct ext4_sb_info {
>         spinlock_t s_fc_lock;
>         struct buffer_head *s_fc_bh;
>         struct ext4_fc_stats s_fc_stats;
> +       tid_t s_fc_ineligible_tid;
>  #ifdef CONFIG_EXT4_DEBUG
>         int s_fc_debug_max_replay;
>  #endif
> @@ -2924,7 +2925,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
>                             struct dentry *dentry);
>  void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
>  void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
> -void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
> +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
>  void ext4_fc_start_update(struct inode *inode);
>  void ext4_fc_stop_update(struct inode *inode);
>  void ext4_fc_del(struct inode *inode);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 9b6c76629c93..3be90aa5a5ba 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -5339,7 +5339,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
>                 ret = PTR_ERR(handle);
>                 goto out_mmap;
>         }
> -       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
> +       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
>
>         down_write(&EXT4_I(inode)->i_data_sem);
>         ext4_discard_preallocations(inode, 0);
> @@ -5479,7 +5479,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
>                 ret = PTR_ERR(handle);
>                 goto out_mmap;
>         }
> -       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
> +       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
>
>         /* Expand file to avoid data loss if there is error while shifting */
>         inode->i_size += len;
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index f0cd20f5fe5e..3673d4798af3 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -303,7 +303,7 @@ void ext4_fc_del(struct inode *inode)
>   * Mark file system as fast commit ineligible. This means that next commit
>   * operation would result in a full jbd2 commit.
>   */
> -void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
> +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
>  {
>         struct ext4_sb_info *sbi = EXT4_SB(sb);
>
> @@ -312,6 +312,10 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
>                 return;
>
>         ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> +       spin_lock(&sbi->s_fc_lock);
> +       if (handle && !IS_ERR(handle))
> +               sbi->s_fc_ineligible_tid = handle->h_transaction->t_tid;
> +       spin_unlock(&sbi->s_fc_lock);
>         WARN_ON(reason >= EXT4_FC_REASON_MAX);
>         sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
>  }
> @@ -387,7 +391,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
>         mutex_unlock(&ei->i_fc_lock);
>         node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
>         if (!node) {
> -               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
> +               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
>                 mutex_lock(&ei->i_fc_lock);
>                 return -ENOMEM;
>         }
> @@ -400,7 +404,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
>                 if (!node->fcd_name.name) {
>                         kmem_cache_free(ext4_fc_dentry_cachep, node);
>                         ext4_fc_mark_ineligible(inode->i_sb,
> -                               EXT4_FC_REASON_NOMEM);
> +                               EXT4_FC_REASON_NOMEM, NULL);
>                         mutex_lock(&ei->i_fc_lock);
>                         return -ENOMEM;
>                 }
> @@ -502,7 +506,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
>
>         if (ext4_should_journal_data(inode)) {
>                 ext4_fc_mark_ineligible(inode->i_sb,
> -                                       EXT4_FC_REASON_INODE_JOURNAL_DATA);
> +                                       EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
>                 return;
>         }
>
> @@ -1180,7 +1184,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
>   * Fast commit cleanup routine. This is called after every fast commit and
>   * full commit. full is true if we are called after a full commit.
>   */
> -static void ext4_fc_cleanup(journal_t *journal, int full)
> +static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
>  {
>         struct super_block *sb = journal->j_private;
>         struct ext4_sb_info *sbi = EXT4_SB(sb);
> @@ -1228,7 +1232,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
>                                 &sbi->s_fc_q[FC_Q_MAIN]);
>
>         ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
> -       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> +       if (tid >= sbi->s_fc_ineligible_tid) {
> +               sbi->s_fc_ineligible_tid = 0;
> +               ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
> +       }
>
>         if (full)
>                 sbi->s_fc_bytes = 0;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 08a90e25b78b..2ffa9bbb1324 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
>         return;
>  no_delete:
>         if (!list_empty(&EXT4_I(inode)->i_fc_list))
> -               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
> +               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, handle);
>         ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
>  }
>
> @@ -5995,7 +5995,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
>                 return PTR_ERR(handle);
>
>         ext4_fc_mark_ineligible(inode->i_sb,
> -               EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
> +               EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
>         err = ext4_mark_inode_dirty(handle, inode);
>         ext4_handle_sync(handle);
>         ext4_journal_stop(handle);
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 1366afb59fba..0557402ac4fd 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
>                 err = -EINVAL;
>                 goto err_out;
>         }
> -       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
> +       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
>
>         /* Protect extent tree against block allocations via delalloc */
>         ext4_double_down_write_data_sem(inode, inode_bl);
> @@ -1073,7 +1073,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>
>                 err = ext4_resize_fs(sb, n_blocks_count);
>                 if (EXT4_SB(sb)->s_journal) {
> -                       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
> +                       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
>                         jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
>                         err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
>                         jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 52c9bd154122..47b9f87dbc6f 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
>                  * dirents in directories.
>                  */
>                 ext4_fc_mark_ineligible(old.inode->i_sb,
> -                       EXT4_FC_REASON_RENAME_DIR);
> +                       EXT4_FC_REASON_RENAME_DIR, handle);
>         } else {
>                 if (new.inode)
>                         ext4_fc_track_unlink(handle, new.dentry);
> @@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
>         if (unlikely(retval))
>                 goto end_rename;
>         ext4_fc_mark_ineligible(new.inode->i_sb,
> -                               EXT4_FC_REASON_CROSS_RENAME);
> +                               EXT4_FC_REASON_CROSS_RENAME, handle);
>         if (old.dir_bh) {
>                 retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
>                 if (retval)
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index d81423021e3b..6049547d3c0f 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -4627,6 +4627,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
>         sbi->s_fc_bytes = 0;
>         ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
>         ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
> +       sbi->s_fc_ineligible_tid = 0;
>         spin_lock_init(&sbi->s_fc_lock);
>         memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
>         sbi->s_fc_replay_state.fc_regions = NULL;
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 1e0fc1ed845b..c1748730c1df 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
>                 if (IS_SYNC(inode))
>                         ext4_handle_sync(handle);
>         }
> -       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
> +       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
>
>  cleanup:
>         brelse(is.iloc.bh);
> @@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
>                 if (error == 0)
>                         error = error2;
>         }
> -       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
> +       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
>
>         return error;
>  }
> @@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
>                                          error);
>                         goto cleanup;
>                 }
> -               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
> +               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
>         }
>         error = 0;
>  cleanup:
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index 3cc4ab2ba7f4..d188fa913a07 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
>         if (journal->j_commit_callback)
>                 journal->j_commit_callback(journal, commit_transaction);
>         if (journal->j_fc_cleanup_callback)
> -               journal->j_fc_cleanup_callback(journal, 1);
> +               journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
>
>         trace_jbd2_end_commit(journal, commit_transaction);
>         jbd_debug(1, "JBD2: commit %d complete, head %d\n",
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 0b86a4365b66..a8e64ad11ae3 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
>  {
>         jbd2_journal_unlock_updates(journal);
>         if (journal->j_fc_cleanup_callback)
> -               journal->j_fc_cleanup_callback(journal, 0);
> +               journal->j_fc_cleanup_callback(journal, 0, tid);
>         write_lock(&journal->j_state_lock);
>         journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
>         if (fallback)
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index fd933c45281a..d63b8106796e 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -1295,7 +1295,7 @@ struct journal_s
>          * Clean-up after fast commit or full commit. JBD2 calls this function
>          * after every commit operation.
>          */
> -       void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
> +       void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
>
>         /**
>          * @j_fc_replay_callback:
> --
> 2.20.1
>



[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux