Re: [PATCH] fs: only sync() superblocks reachable from the current namespace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jan 26, 2018 at 02:58:39PM -0800, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@xxxxxx>
> 
> Currently, the sync() syscall is system-wide, so any process in a
> container can cause significant I/O stalls across the system by calling
> sync(). This is even true for filesystems which are not accessible in
> the process' mount namespace. This patch scopes sync() to only write out
> filesystems reachable in the current mount namespace, except for the
> initial mount namespace, which still syncs everything to avoid
> surprises. This fixes the broken isolation we were seeing here.
> 
> Signed-off-by: Omar Sandoval <osandov@xxxxxx>
> ---
>  fs/sync.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 51 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/sync.c b/fs/sync.c
> index 6e0a2cbaf6de..bde1e3196298 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -17,6 +17,7 @@
>  #include <linux/quotaops.h>
>  #include <linux/backing-dev.h>
>  #include "internal.h"
> +#include "mount.h"
>  
>  #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
>  			SYNC_FILE_RANGE_WAIT_AFTER)
> @@ -68,16 +69,46 @@ int sync_filesystem(struct super_block *sb)
>  }
>  EXPORT_SYMBOL(sync_filesystem);
>  
> -static void sync_inodes_one_sb(struct super_block *sb, void *arg)
> +struct sb_sync {
> +	/*
> +	 * Only sync superblocks reachable from this namespace. If NULL, sync
> +	 * everything.
> +	 */
> +	struct mnt_namespace *mnt_ns;
> +
> +	/* ->sync_fs() wait argument. */
> +	int wait;
> +};
> +
> +static int sb_reachable(struct super_block *sb, struct mnt_namespace *mnt_ns)
> +{
> +	struct mount *mnt;
> +
> +	if (!mnt_ns)
> +		return 1;
> +
> +	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
> +		if (mnt->mnt_ns == mnt_ns)
> +			return 1;
> +	}

Sigh, of course, I forgot to grab the proper locks here. Will send a v2.

> +	return 0;
> +}
> +
> +static void sync_inodes_one_sb(struct super_block *sb, void *p)
>  {
> -	if (!sb_rdonly(sb))
> +	struct sb_sync *arg = p;
> +
> +	if (!sb_rdonly(sb) && sb_reachable(sb, arg->mnt_ns))
>  		sync_inodes_sb(sb);
>  }
>  
> -static void sync_fs_one_sb(struct super_block *sb, void *arg)
> +static void sync_fs_one_sb(struct super_block *sb, void *p)
>  {
> -	if (!sb_rdonly(sb) && sb->s_op->sync_fs)
> -		sb->s_op->sync_fs(sb, *(int *)arg);
> +	struct sb_sync *arg = p;
> +
> +	if (!sb_rdonly(sb) && sb_reachable(sb, arg->mnt_ns) &&
> +	    sb->s_op->sync_fs)
> +		sb->s_op->sync_fs(sb, arg->wait);
>  }
>  
>  static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
> @@ -107,12 +138,18 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
>   */
>  SYSCALL_DEFINE0(sync)
>  {
> -	int nowait = 0, wait = 1;
> +	struct sb_sync arg = {
> +		.mnt_ns = current->nsproxy->mnt_ns,
> +	};
> +
> +	if (arg.mnt_ns == init_task.nsproxy->mnt_ns)
> +		arg.mnt_ns = NULL;
>  
>  	wakeup_flusher_threads(WB_REASON_SYNC);
> -	iterate_supers(sync_inodes_one_sb, NULL);
> -	iterate_supers(sync_fs_one_sb, &nowait);
> -	iterate_supers(sync_fs_one_sb, &wait);
> +	iterate_supers(sync_inodes_one_sb, &arg);
> +	iterate_supers(sync_fs_one_sb, &arg);
> +	arg.wait = 1;
> +	iterate_supers(sync_fs_one_sb, &arg);
>  	iterate_bdevs(fdatawrite_one_bdev, NULL);
>  	iterate_bdevs(fdatawait_one_bdev, NULL);
>  	if (unlikely(laptop_mode))
> @@ -122,17 +159,17 @@ SYSCALL_DEFINE0(sync)
>  
>  static void do_sync_work(struct work_struct *work)
>  {
> -	int nowait = 0;
> +	struct sb_sync arg = {};
>  
>  	/*
>  	 * Sync twice to reduce the possibility we skipped some inodes / pages
>  	 * because they were temporarily locked
>  	 */
> -	iterate_supers(sync_inodes_one_sb, &nowait);
> -	iterate_supers(sync_fs_one_sb, &nowait);
> +	iterate_supers(sync_inodes_one_sb, &arg);
> +	iterate_supers(sync_fs_one_sb, &arg);
>  	iterate_bdevs(fdatawrite_one_bdev, NULL);
> -	iterate_supers(sync_inodes_one_sb, &nowait);
> -	iterate_supers(sync_fs_one_sb, &nowait);
> +	iterate_supers(sync_inodes_one_sb, &arg);
> +	iterate_supers(sync_fs_one_sb, &arg);
>  	iterate_bdevs(fdatawrite_one_bdev, NULL);
>  	printk("Emergency Sync complete\n");
>  	kfree(work);
> -- 
> 2.16.1
> 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux