Re: [PATCH v6 3/5] mm: introduce mmap3 for safely defining new mmap flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed 23-08-17 16:48:51, Dan Williams wrote:
> The mmap(2) syscall suffers from the ABI anti-pattern of not validating
> unknown flags. However, proposals like MAP_SYNC and MAP_DIRECT need a
> mechanism to define new behavior that is known to fail on older kernels
> without the support. Define a new mmap3 syscall that checks for
> unsupported flags at syscall entry and add a 'mmap_supported_mask' to
> 'struct file_operations' so generic code can validate the ->mmap()
> handler knows about the specified flags. This also arranges for the
> flags to be passed to the handler so it can do further local validation
> if the requested behavior can be fulfilled.
> 
> Cc: Jan Kara <jack@xxxxxxx>
> Cc: Arnd Bergmann <arnd@xxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Suggested-by: Andy Lutomirski <luto@xxxxxxxxxx>
> Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>

OK, are we sold on this approach to introduction of new mmap flags? I'm
asking because working API for mmap flag is basically the only thing that's
missing from my MAP_SYNC patches so I'd like to rebase my patches onto
something that is working...

								Honza


> ---
>  arch/x86/entry/syscalls/syscall_32.tbl |    1 +
>  arch/x86/entry/syscalls/syscall_64.tbl |    1 +
>  include/linux/fs.h                     |    1 +
>  include/linux/mm.h                     |    2 +-
>  include/linux/mman.h                   |   42 ++++++++++++++++++++++++++++++++
>  include/linux/syscalls.h               |    3 ++
>  mm/mmap.c                              |   32 ++++++++++++++++++++++--
>  7 files changed, 78 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
> index 448ac2161112..0618b5b38b45 100644
> --- a/arch/x86/entry/syscalls/syscall_32.tbl
> +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> @@ -391,3 +391,4 @@
>  382	i386	pkey_free		sys_pkey_free
>  383	i386	statx			sys_statx
>  384	i386	arch_prctl		sys_arch_prctl			compat_sys_arch_prctl
> +385	i386	mmap3			sys_mmap_pgoff_strict
> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
> index 5aef183e2f85..e204c736d7e9 100644
> --- a/arch/x86/entry/syscalls/syscall_64.tbl
> +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> @@ -339,6 +339,7 @@
>  330	common	pkey_alloc		sys_pkey_alloc
>  331	common	pkey_free		sys_pkey_free
>  332	common	statx			sys_statx
> +333	common  mmap3			sys_mmap_pgoff_strict
>  
>  #
>  # x32-specific system call numbers start at 512 to avoid cache impact
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 33d1ee8f51be..db42da9f98c4 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1674,6 +1674,7 @@ struct file_operations {
>  	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
>  	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
>  	int (*mmap) (struct file *, struct vm_area_struct *, unsigned long);
> +	unsigned long mmap_supported_mask;
>  	int (*open) (struct inode *, struct file *);
>  	int (*flush) (struct file *, fl_owner_t id);
>  	int (*release) (struct inode *, struct file *);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 46b9ac5e8569..49eef48da4b7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2090,7 +2090,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
>  
>  extern unsigned long mmap_region(struct file *file, unsigned long addr,
>  	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> -	struct list_head *uf);
> +	struct list_head *uf, unsigned long flags);
>  extern unsigned long do_mmap(struct file *file, unsigned long addr,
>  	unsigned long len, unsigned long prot, unsigned long flags,
>  	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
> diff --git a/include/linux/mman.h b/include/linux/mman.h
> index c8367041fafd..64b6cb3dec70 100644
> --- a/include/linux/mman.h
> +++ b/include/linux/mman.h
> @@ -7,6 +7,48 @@
>  #include <linux/atomic.h>
>  #include <uapi/linux/mman.h>
>  
> +/*
> + * Arrange for undefined architecture specific flags to be rejected by
> + * default.
> + */
> +#ifndef MAP_32BIT
> +#define MAP_32BIT 0
> +#endif
> +#ifndef MAP_HUGE_2MB
> +#define MAP_HUGE_2MB 0
> +#endif
> +#ifndef MAP_HUGE_1GB
> +#define MAP_HUGE_1GB 0
> +#endif
> +#ifndef MAP_UNINITIALIZED
> +#define MAP_UNINITIALIZED 0
> +#endif
> +
> +/*
> + * The historical set of flags that all mmap implementations implicitly
> + * support when file_operations.mmap_supported_mask is zero. With the
> + * mmap3 syscall the deprecated MAP_DENYWRITE and MAP_EXECUTABLE bit
> + * values are explicitly rejected with EOPNOTSUPP rather than being
> + * silently accepted.
> + */
> +#define LEGACY_MAP_SUPPORTED_MASK (MAP_SHARED \
> +		| MAP_PRIVATE \
> +		| MAP_FIXED \
> +		| MAP_ANONYMOUS \
> +		| MAP_UNINITIALIZED \
> +		| MAP_GROWSDOWN \
> +		| MAP_LOCKED \
> +		| MAP_NORESERVE \
> +		| MAP_POPULATE \
> +		| MAP_NONBLOCK \
> +		| MAP_STACK \
> +		| MAP_HUGETLB \
> +		| MAP_32BIT \
> +		| MAP_HUGE_2MB \
> +		| MAP_HUGE_1GB)
> +
> +#define	MAP_SUPPORTED_MASK (LEGACY_MAP_SUPPORTED_MASK)
> +
>  extern int sysctl_overcommit_memory;
>  extern int sysctl_overcommit_ratio;
>  extern unsigned long sysctl_overcommit_kbytes;
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 3cb15ea48aee..c0e0c99cf4ad 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -858,6 +858,9 @@ asmlinkage long sys_perf_event_open(
>  asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
>  			unsigned long prot, unsigned long flags,
>  			unsigned long fd, unsigned long pgoff);
> +asmlinkage long sys_mmap_pgoff_strict(unsigned long addr, unsigned long len,
> +			unsigned long prot, unsigned long flags,
> +			unsigned long fd, unsigned long pgoff);
>  asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
>  asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
>  				      struct file_handle __user *handle,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 744faae86781..386706831d67 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1464,7 +1464,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
>  			vm_flags |= VM_NORESERVE;
>  	}
>  
> -	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
> +	addr = mmap_region(file, addr, len, vm_flags, pgoff, uf, flags);
>  	if (!IS_ERR_VALUE(addr) &&
>  	    ((vm_flags & VM_LOCKED) ||
>  	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
> @@ -1521,6 +1521,32 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
>  	return retval;
>  }
>  
> +SYSCALL_DEFINE6(mmap_pgoff_strict, unsigned long, addr, unsigned long, len,
> +		unsigned long, prot, unsigned long, flags,
> +		unsigned long, fd, unsigned long, pgoff)
> +{
> +	if (flags & ~(MAP_SUPPORTED_MASK))
> +		return -EOPNOTSUPP;
> +
> +	if (!(flags & MAP_ANONYMOUS)) {
> +		unsigned long f_supported;
> +		struct file *file;
> +
> +		audit_mmap_fd(fd, flags);
> +		file = fget(fd);
> +		if (!file)
> +			return -EBADF;
> +		f_supported = file->f_op->mmap_supported_mask;
> +		fput(file);
> +		if (!f_supported)
> +			f_supported = LEGACY_MAP_SUPPORTED_MASK;
> +		if (flags & ~f_supported)
> +			return -EOPNOTSUPP;
> +	}
> +
> +	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
> +}
> +
>  #ifdef __ARCH_WANT_SYS_OLD_MMAP
>  struct mmap_arg_struct {
>  	unsigned long addr;
> @@ -1601,7 +1627,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
>  
>  unsigned long mmap_region(struct file *file, unsigned long addr,
>  		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> -		struct list_head *uf)
> +		struct list_head *uf, unsigned long flags)
>  {
>  	struct mm_struct *mm = current->mm;
>  	struct vm_area_struct *vma, *prev;
> @@ -1686,7 +1712,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
>  		 * new file must not have been exposed to user-space, yet.
>  		 */
>  		vma->vm_file = get_file(file);
> -		error = call_mmap(file, vma, 0);
> +		error = call_mmap(file, vma, flags);
>  		if (error)
>  			goto unmap_and_free_vma;
>  
> 
-- 
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux