Re: [PATCH v2 6/9] mm/mshare: Add mmap operation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jun 29, 2022 at 04:53:57PM -0600, Khalid Aziz wrote:
> mmap is used to establish address range for mshare region and map the
> region into process's address space. Add basic mmap operation that
> supports setting address range. Also fix code to not allocate new
> mm_struct for files in msharefs that exist for information and not
> for defining a new mshare region.
> 
> Signed-off-by: Khalid Aziz <khalid.aziz@xxxxxxxxxx>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx>
> ---
>  mm/mshare.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/mshare.c b/mm/mshare.c
> index d238b68b0576..088a6cab1e93 100644
> --- a/mm/mshare.c
> +++ b/mm/mshare.c
> @@ -9,7 +9,8 @@
>   *
>   *
>   * Copyright (C) 2022 Oracle Corp. All rights reserved.
> - * Author:	Khalid Aziz <khalid.aziz@xxxxxxxxxx>
> + * Authors:	Khalid Aziz <khalid.aziz@xxxxxxxxxx>
> + *		Matthew Wilcox <willy@xxxxxxxxxxxxx>
>   *
>   */
>  
> @@ -60,9 +61,36 @@ msharefs_read(struct kiocb *iocb, struct iov_iter *iov)
>  	return ret;
>  }
>  
> +static int
> +msharefs_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	struct mshare_data *info = file->private_data;
> +	struct mm_struct *mm = info->mm;
> +
> +	/*
> +	 * If this mshare region has been set up once already, bail out
> +	 */
> +	if (mm->mmap_base != 0)
> +		return -EINVAL;
> +
> +	if ((vma->vm_start | vma->vm_end) & (PGDIR_SIZE - 1))
> +		return -EINVAL;
> +
> +	mm->mmap_base = vma->vm_start;
> +	mm->task_size = vma->vm_end - vma->vm_start;
> +	if (!mm->task_size)
> +		mm->task_size--;
> +	info->minfo->start = mm->mmap_base;
> +	info->minfo->size = mm->task_size;

So, uh, if the second mmap() caller decides to ignore the mshare_info,
should they get an -EINVAL here since the memory mappings won't be at
the same process virtual address?

> +	vma->vm_flags |= VM_SHARED_PT;
> +	vma->vm_private_data = info;
> +	return 0;
> +}
> +
>  static const struct file_operations msharefs_file_operations = {
>  	.open		= msharefs_open,
>  	.read_iter	= msharefs_read,
> +	.mmap		= msharefs_mmap,
>  	.llseek		= no_llseek,
>  };
>  
> @@ -119,7 +147,12 @@ msharefs_fill_mm(struct inode *inode)
>  		goto err_free;
>  	}
>  	info->mm = mm;
> -	info->minfo = NULL;
> +	info->minfo = kzalloc(sizeof(struct mshare_info), GFP_KERNEL);
> +	if (info->minfo == NULL) {
> +		retval = -ENOMEM;
> +		goto err_free;
> +	}
> +
>  	refcount_set(&info->refcnt, 1);
>  	inode->i_private = info;
>  
> @@ -128,13 +161,14 @@ msharefs_fill_mm(struct inode *inode)
>  err_free:
>  	if (mm)
>  		mmput(mm);
> +	kfree(info->minfo);
>  	kfree(info);
>  	return retval;
>  }
>  
>  static struct inode
>  *msharefs_get_inode(struct super_block *sb, const struct inode *dir,
> -			umode_t mode)
> +			umode_t mode, bool newmm)
>  {
>  	struct inode *inode = new_inode(sb);
>  	if (inode) {
> @@ -147,7 +181,7 @@ static struct inode
>  		case S_IFREG:
>  			inode->i_op = &msharefs_file_inode_ops;
>  			inode->i_fop = &msharefs_file_operations;
> -			if (msharefs_fill_mm(inode) != 0) {
> +			if (newmm && msharefs_fill_mm(inode) != 0) {
>  				discard_new_inode(inode);
>  				inode = ERR_PTR(-ENOMEM);
>  			}
> @@ -177,7 +211,7 @@ msharefs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
>  	struct inode *inode;
>  	int err = 0;
>  
> -	inode = msharefs_get_inode(dir->i_sb, dir, mode);
> +	inode = msharefs_get_inode(dir->i_sb, dir, mode, true);
>  	if (IS_ERR(inode))
>  		return PTR_ERR(inode);
>  
> @@ -267,7 +301,7 @@ prepopulate_files(struct super_block *s, struct inode *dir,
>  		if (!dentry)
>  			return -ENOMEM;
>  
> -		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode);
> +		inode = msharefs_get_inode(s, dir, S_IFREG | files->mode, false);

I was wondering why the information files were getting their own
mshare_data.

TBH I'm not really sure what the difference is between mshare_data and
mshare_info, since those names are not especially distinct.

>  		if (!inode) {
>  			dput(dentry);
>  			return -ENOMEM;
> @@ -301,7 +335,7 @@ msharefs_fill_super(struct super_block *sb, struct fs_context *fc)
>  	sb->s_d_op		= &msharefs_d_ops;
>  	sb->s_time_gran		= 1;
>  
> -	inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777);
> +	inode = msharefs_get_inode(sb, NULL, S_IFDIR | 0777, false);

Is it wise to default to world-writable?  Surely whatever userspace
software wraps an msharefs can relax permissions as needed.

--D

>  	if (!inode) {
>  		err = -ENOMEM;
>  		goto out;
> -- 
> 2.32.0
> 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux