Re: [RFC PATCH 3/3] ovl: implement stacked mmap for shared map

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Aug 31, 2020 at 4:47 PM cgxu <cgxu519@xxxxxxxxxxxx> wrote:
>
> On 8/30/20 7:33 PM, Amir Goldstein wrote:
> > On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@xxxxxxxxxxxx> wrote:
> >>
> >> Implement stacked mmap for shared map to keep data
> >> consistency.
> >>
> >> Signed-off-by: Chengguang Xu <cgxu519@xxxxxxxxxxxx>
> >> ---
> >>   fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++---
> >>   1 file changed, 114 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> index 14ab5344a918..db5ab200d984 100644
> >> --- a/fs/overlayfs/file.c
> >> +++ b/fs/overlayfs/file.c
> >> @@ -21,9 +21,17 @@ struct ovl_aio_req {
> >>          struct fd fd;
> >>   };
> >>
> >> +static vm_fault_t ovl_fault(struct vm_fault *vmf);
> >> +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf);
> >> +
> >> +static const struct vm_operations_struct ovl_vm_ops = {
> >> +       .fault          = ovl_fault,
> >> +       .page_mkwrite   = ovl_page_mkwrite,
> >> +};
> >> +
> >
> > Interesting direction, not sure if this is workable.
> > I don't know enough about mm to say.
> >
> > But what about the rest of the operations?
> > Did you go over them and decide that overlay doesn't need to implement them?
> > I doubt it, but if you did, please document that.
>
> I did some check for rest of them, IIUC ->fault will be enough for this
> special case (shared read-only mmap with no upper), I will remove
> ->page_mkwrite in v2.

Ok I suppose you checked that ->map_pages is not relevant?

>
> # I do not consider support ->huge_fault in current stage due to many fs
> cannot support DAX properly.
>
> BTW, do you know who should I add to CC list for further deep review of
> this code? fadevel-list?
>

fsdevel would be good, but I would wait for initial feedback from Miklos
before you post v2...

>
>
> >
> >>   struct ovl_file_entry {
> >>          struct file *realfile;
> >> -       void *vm_ops;
> >> +       const struct vm_operations_struct *vm_ops;
> >>   };
> >>
> >>   struct file *ovl_get_realfile(struct file *file)
> >> @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile)
> >>          ofe->realfile = realfile;
> >>   }
> >>
> >> -void *ovl_get_real_vmops(struct file *file)
> >> +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >>          return ofe->vm_ops;
> >>   }
> >>
> >> -void ovl_set_real_vmops(struct file *file, void *vm_ops)
> >> +void ovl_set_real_vmops(struct file *file,
> >> +                       const struct vm_operations_struct *vm_ops)
> >>   {
> >>          struct ovl_file_entry *ofe = file->private_data;
> >>
> >> @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
> >>          return ret;
> >>   }
> >>
> >> +vm_fault_t ovl_fault(struct vm_fault *vmf)
> >> +{
> >> +       struct vm_area_struct *vma = vmf->vma;
> >> +       struct file *file = vma->vm_file;
> >> +       struct file *realfile;
> >> +       struct file *fpin, *tmp;
> >> +       struct inode *inode = file_inode(file);
> >> +       struct inode *realinode;
> >> +       const struct cred *old_cred;
> >> +       bool retry_allowed;
> >> +       vm_fault_t ret;
> >> +       int err = 0;
> >> +
> >> +       if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) {
> >> +               realfile = ovl_get_realfile(file);
> >> +
> >> +               if (!ovl_has_upperdata(inode) ||
> >> +                   realfile->f_inode != ovl_inode_upper(inode) ||
> >> +                   !realfile->f_op->mmap)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               if (!ovl_get_real_vmops(file)) {
> >> +                       old_cred = ovl_override_creds(inode->i_sb);
> >> +                       err = call_mmap(realfile, vma);
> >> +                       revert_creds(old_cred);
> >> +
> >> +                       vma->vm_file = file;
> >> +                       if (err) {
> >> +                               vma->vm_ops = &ovl_vm_ops;
> >> +                               return VM_FAULT_SIGBUS;
> >> +                       }
> >> +                       ovl_set_real_vmops(file, vma->vm_ops);
> >> +                       vma->vm_ops = &ovl_vm_ops;
> >> +               }
> >> +
> >> +               retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY);
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> >> +               vma->vm_file = realfile;
> >> +               ret = ovl_get_real_vmops(file)->fault(vmf);
> >> +               vma->vm_file = file;
> >> +               if (retry_allowed)
> >> +                       vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY;
> >> +               return ret;
> >> +
> >> +       } else {
> >> +               fpin = maybe_unlock_mmap_for_io(vmf, NULL);
> >> +               if (!fpin)
> >> +                       return VM_FAULT_SIGBUS;
> >> +
> >> +               ret = VM_FAULT_RETRY;
> >> +               if (!ovl_has_upperdata(inode)) {
> >> +                       err = ovl_copy_up_with_data(file->f_path.dentry);
> >> +                       if (err)
> >> +                               goto out;
> >> +               }
> >> +
> >> +               realinode = ovl_inode_realdata(inode);
> >> +               realfile = ovl_open_realfile(file, realinode);
> >> +               if (IS_ERR(realfile))
> >> +                       goto out;
> >> +
> >> +               tmp = ovl_get_realfile(file);
> >> +               ovl_set_realfile(file, realfile);
> >> +               fput(tmp);
> >> +
> >> +out:
> >> +               fput(fpin);
> >> +               return ret;
> >> +       }
> >> +}
> >
> >
> > Please add some documentation to explain the method used.
> > Do we need to retry if real_vmops are already set?
> >
>
> Good catch, actually retry is not needed in that case.
>
> Basically, we unlock(mmap_lock)->copy-up->open when
> detecting no upper inode then retry fault operation.
> However, we need to check fault retry flag carefully
> for avoiding endless retry.

That much I got, but the details of setting ->vm_file and vmops
look subtle, so better explain them.

Thanks,
Amir.



[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux