Provide helpers to set and clear sb->s_readonly_remount including appropriate memory barriers. Also use this opportunity to document what the barriers pair with and why they are needed. Suggested-by: Dave Chinner <david@xxxxxxxxxxxxx> Signed-off-by: Jan Kara <jack@xxxxxxx> --- fs/internal.h | 41 +++++++++++++++++++++++++++++++++++++++++ fs/namespace.c | 25 ++++++++++++++++--------- fs/super.c | 17 ++++++----------- include/linux/fs.h | 2 +- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index bd3b2810a36b..b916b84809f3 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -120,6 +120,47 @@ void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); int sb_init_dio_done_wq(struct super_block *sb); +/* + * Prepare superblock for changing its read-only state (i.e., either remount + * read-write superblock read-only or vice versa). After this function returns + * mnt_is_readonly() will return true for any mount of the superblock if its + * caller is able to observe any changes done by the remount. This holds until + * sb_end_ro_state_change() is called. + */ +static inline void sb_start_ro_state_change(struct super_block *sb) +{ + WRITE_ONCE(sb->s_readonly_remount, 1); + /* + * For RO->RW transition, the barrier pairs with the barrier in + * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY + * cleared, it will see s_readonly_remount set. + * For RW->RO transition, the barrier pairs with the barrier in + * __mnt_want_write() before the mnt_is_readonly() check. The barrier + * makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already + * cleared, it will see s_readonly_remount set. + */ + smp_wmb(); +} + +/* + * Ends section changing read-only state of the superblock. After this function + * returns if mnt_is_readonly() returns false, the caller will be able to + * observe all the changes remount did to the superblock. + */ +static inline void sb_end_ro_state_change(struct super_block *sb) +{ + /* + * This barrier provides release semantics that pairs with + * the smp_rmb() acquire semantics in mnt_is_readonly(). + * This barrier pair ensure that when mnt_is_readonly() sees + * 0 for sb->s_readonly_remount, it will also see all the + * preceding flag changes that were made during the RO state + * change. + */ + smp_wmb(); + WRITE_ONCE(sb->s_readonly_remount, 0); +} + /* * open.c */ diff --git a/fs/namespace.c b/fs/namespace.c index 54847db5b819..5ba1eca6f720 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt) static int mnt_is_readonly(struct vfsmount *mnt) { - if (mnt->mnt_sb->s_readonly_remount) + if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) return 1; - /* Order wrt setting s_flags/s_readonly_remount in do_remount() */ + /* + * The barrier pairs with the barrier in sb_start_ro_state_change() + * making sure if we don't see s_readonly_remount set yet, we also will + * not see any superblock / mount flag changes done by remount. + * It also pairs with the barrier in sb_end_ro_state_change() + * assuring that if we see s_readonly_remount already cleared, we will + * see the values of superblock / mount flags updated by remount. + */ smp_rmb(); return __mnt_is_readonly(mnt); } @@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m) } } /* - * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will - * be set to match its requirements. So we must not load that until - * MNT_WRITE_HOLD is cleared. + * The barrier pairs with the barrier sb_start_ro_state_change() making + * sure that if we see MNT_WRITE_HOLD cleared, we will also see + * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in + * mnt_is_readonly() and bail in case we are racing with remount + * read-only. */ smp_rmb(); if (mnt_is_readonly(m)) { @@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (!err && atomic_long_read(&sb->s_remove_count)) err = -EBUSY; - if (!err) { - sb->s_readonly_remount = 1; - smp_wmb(); - } + if (!err) + sb_start_ro_state_change(sb); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; diff --git a/fs/super.c b/fs/super.c index 6cd64961aa07..8a39902b859f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -944,8 +944,7 @@ int reconfigure_super(struct fs_context *fc) */ if (remount_ro) { if (force) { - sb->s_readonly_remount = 1; - smp_wmb(); + sb_start_ro_state_change(sb); } else { retval = sb_prepare_remount_readonly(sb); if (retval) @@ -953,12 +952,10 @@ int reconfigure_super(struct fs_context *fc) } } else if (remount_rw) { /* - * We set s_readonly_remount here to protect filesystem's - * reconfigure code from writes from userspace until - * reconfigure finishes. + * Protect filesystem's reconfigure code from writes from + * userspace until reconfigure finishes. */ - sb->s_readonly_remount = 1; - smp_wmb(); + sb_start_ro_state_change(sb); } if (fc->ops->reconfigure) { @@ -974,9 +971,7 @@ int reconfigure_super(struct fs_context *fc) WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) | (fc->sb_flags & fc->sb_flags_mask))); - /* Needs to be ordered wrt mnt_is_readonly() */ - smp_wmb(); - sb->s_readonly_remount = 0; + sb_end_ro_state_change(sb); /* * Some filesystems modify their metadata via some other path than the @@ -991,7 +986,7 @@ int reconfigure_super(struct fs_context *fc) return 0; cancel_readonly: - sb->s_readonly_remount = 0; + sb_end_ro_state_change(sb); return retval; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 133f0640fb24..ede51d60d124 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1242,7 +1242,7 @@ struct super_block { */ atomic_long_t s_fsnotify_connectors; - /* Being remounted read-only */ + /* Read-only state of the superblock is being changed */ int s_readonly_remount; /* per-sb errseq_t for reporting writeback errors via syncfs */ -- 2.35.3