Re: [PATCH] mount: fix mounting of detached mounts onto targets that reside on shared mounts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 04, 2021 at 06:41:55PM +0100, Christian Brauner wrote:
> Creating a series of detached mounts, attaching them to the filesystem,
> and unmounting them can be used to trigger an integer overflow in
> ns->mounts causing the kernel to block any new mounts in count_mounts()
> and returning ENOSPC because it falsely assumes that the maximum number
> of mounts in the mount namespace has been reached, i.e. it thinks it
> can't fit the new mounts into the mount namespace anymore.
> 
> Depending on the number of mounts in your system, this can be reproduced
> on any kernel that supportes open_tree() and move_mount() with the
> following instructions:
> 
> 1. Compile the following program "repro.c" via "make repro"
>   > cat repro.c
> 
>   #define _GNU_SOURCE
>   #include <errno.h>
>   #include <fcntl.h>
>   #include <getopt.h>
>   #include <limits.h>
>   #include <stdbool.h>
>   #include <stdio.h>
>   #include <stdlib.h>
>   #include <linux/mount.h>
>   #include <sys/syscall.h>
>   #include <sys/types.h>
>   #include <unistd.h>
> 
>   /* open_tree() */
>   #ifndef OPEN_TREE_CLONE
>   #define OPEN_TREE_CLONE 1
>   #endif
> 
>   #ifndef OPEN_TREE_CLOEXEC
>   #define OPEN_TREE_CLOEXEC O_CLOEXEC
>   #endif
> 
>   #ifndef __NR_open_tree
>   	#if defined __alpha__
>   		#define __NR_open_tree 538
>   	#elif defined _MIPS_SIM
>   		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
>   			#define __NR_open_tree 4428
>   		#endif
>   		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
>   			#define __NR_open_tree 6428
>   		#endif
>   		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
>   			#define __NR_open_tree 5428
>   		#endif
>   	#elif defined __ia64__
>   		#define __NR_open_tree (428 + 1024)
>   	#else
>   		#define __NR_open_tree 428
>   	#endif
>   #endif
> 
>   /* move_mount() */
>   #ifndef MOVE_MOUNT_F_EMPTY_PATH
>   #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
>   #endif
> 
>   #ifndef __NR_move_mount
>   	#if defined __alpha__
>   		#define __NR_move_mount 539
>   	#elif defined _MIPS_SIM
>   		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
>   			#define __NR_move_mount 4429
>   		#endif
>   		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
>   			#define __NR_move_mount 6429
>   		#endif
>   		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
>   			#define __NR_move_mount 5429
>   		#endif
>   	#elif defined __ia64__
>   		#define __NR_move_mount (428 + 1024)
>   	#else
>   		#define __NR_move_mount 429
>   	#endif
>   #endif
> 
>   static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
>   {
>   	return syscall(__NR_open_tree, dfd, filename, flags);
>   }
> 
>   static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd,
>   				 const char *to_pathname, unsigned int flags)
>   {
>   	return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
>   }
> 
>   static void usage(void)
>   {
>   	const char *text = "mount-new [--recursive] <source> <target>\n";
>   	fprintf(stderr, "%s", text);
>   	_exit(EXIT_SUCCESS);
>   }
> 
>   #define exit_usage(format, ...)                         \
>   	({                                              \
>   		fprintf(stderr, format, ##__VA_ARGS__); \
>   		usage();                                \
>   	})
> 
>   #define exit_log(format, ...)                           \
>   	({                                              \
>   		fprintf(stderr, format, ##__VA_ARGS__); \
>   		exit(EXIT_FAILURE);                     \
>   	})
> 
>   static const struct option longopts[] = {
>   	{"recursive",	no_argument,		0,	'a'},
>   	{"help",	no_argument,		0,	'b'},
>   	{ NULL,		no_argument,		0,	 0 },
>   };
> 
>   int main(int argc, char *argv[])
>   {
>   	int index = 0;
>   	bool recursive = false;
>   	int fd_tree, new_argc, ret;
>   	char *source, *target;
>   	char *const *new_argv;
> 
>   	while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
>   		switch (ret) {
>   		case 'a':
>   			recursive = true;
>   			break;
>   		case 'b':
>   			/* fallthrough */
>   		default:
>   			usage();
>   		}
>   	}
> 
>   	new_argv = &argv[optind];
>   	new_argc = argc - optind;
>   	if (new_argc < 2)
>   		exit_usage("Missing source or target mountpoint\n\n");
>   	source = new_argv[0];
>   	target = new_argv[1];
> 
>   	fd_tree = sys_open_tree(-EBADF, source,
>   				OPEN_TREE_CLONE |
>   				OPEN_TREE_CLOEXEC |
>   				AT_EMPTY_PATH |
>   				(recursive ? AT_RECURSIVE : 0));
>   	if (fd_tree < 0) {
>   		exit_log("%m - Failed to open %s\n", source);
>   		exit(EXIT_FAILURE);
>   	}
> 
>   	ret = sys_move_mount(fd_tree, "", -EBADF, target, MOVE_MOUNT_F_EMPTY_PATH);
>   	if (ret < 0)
>   		exit_log("%m - Failed to attach mount to %s\n", target);
>   	close(fd_tree);
> 
>   	exit(EXIT_SUCCESS);
>   }
> 
> 2. Run a loop like:
> 
> while true; do sudo ./repro; done

The full reproducer is:

while true; do sudo ./repro /mnt /mnt; sudo umount -l /mnt; done

> 
> and wait for the kernel to refuse any new mounts by returning ENOSPC.
> Depending on the number of mounts you have on the system this might take
> shorter or longer.
> 
> The root cause of this is that detached mounts aren't handled correctly
> when the destination mount point is MS_SHARED causing  a borked mount
> tree and ultimately to a miscalculation of the number of mounts in the
> mount namespace.
> 
> Detached mounts created via
> open_tree(fd, path, OPEN_TREE_CLONE)
> are essentially like an unattached new mount, or an unattached
> bind-mount. They can then later on be attached to the filesystem via
> move_mount() which calles into attach_recursive_mount(). Part of
> attaching it to the filesystem is making sure that mounts get correctly
> propagated in case the destination mountpoint is MS_SHARED, i.e.  is a
> shared mountpoint. This is done by calling into propagate_mnt() which
> walks the list of peers calling propagate_one() on each mount in this
> list making sure it receives the propagation event.
> For new mounts and bind-mounts propagate_one() knows to skip them by
> realizing that there's no mount namespace attached to them.
> However, detached mounts have an anonymous mount namespace attached to
> them and so they aren't skipped causing the mount table to get wonky and
> ultimately preventing any new mounts via the ENOSPC issue.
> 
> So teach propagation to handle detached mounts by making it aware of
> them. I've been tracking this issue down for the last couple of days by
> unmounting everything in my current mount table leaving only /proc and
> /sys mounted and running the reproducer above verifying the counting of
> the mounts. With this fix the counts are correct and the ENOSPC issue
> can't be reproduced.
> 
> Fixes: 2db154b3ea8e ("vfs: syscall: Add move_mount(2) to move mounts around")
> Cc: Christoph Hellwig <hch@xxxxxx>
> Cc: David Howells <dhowells@xxxxxxxxxx>
> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
> Cc: linux-fsdevel@xxxxxxxxxxxxxxx
> Cc: <stable@xxxxxxxxxxxxxxx>
> Signed-off-by: Christian Brauner <christian.brauner@xxxxxxxxxx>
> ---
>  fs/pnode.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/pnode.h b/fs/pnode.h
> index 26f74e092bd9..988f1aa9b02a 100644
> --- a/fs/pnode.h
> +++ b/fs/pnode.h
> @@ -12,7 +12,7 @@
>  
>  #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
>  #define IS_MNT_SLAVE(m) ((m)->mnt_master)
> -#define IS_MNT_NEW(m)  (!(m)->mnt_ns)
> +#define IS_MNT_NEW(m)  (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns))
>  #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
>  #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
>  #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
> 
> base-commit: f69d02e37a85645aa90d18cacfff36dba370f797
> -- 
> 2.27.0
> 



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux