On Sat, Aug 29, 2020 at 12:51 PM Chengguang Xu <cgxu519@xxxxxxxxxxxx> wrote: > > Implement stacked mmap for shared map to keep data > consistency. > > Signed-off-by: Chengguang Xu <cgxu519@xxxxxxxxxxxx> > --- > fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 114 insertions(+), 6 deletions(-) > > diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c > index 14ab5344a918..db5ab200d984 100644 > --- a/fs/overlayfs/file.c > +++ b/fs/overlayfs/file.c > @@ -21,9 +21,17 @@ struct ovl_aio_req { > struct fd fd; > }; > > +static vm_fault_t ovl_fault(struct vm_fault *vmf); > +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf); > + > +static const struct vm_operations_struct ovl_vm_ops = { > + .fault = ovl_fault, > + .page_mkwrite = ovl_page_mkwrite, > +}; > + Interesting direction, not sure if this is workable. I don't know enough about mm to say. But what about the rest of the operations? Did you go over them and decide that overlay doesn't need to implement them? I doubt it, but if you did, please document that. > struct ovl_file_entry { > struct file *realfile; > - void *vm_ops; > + const struct vm_operations_struct *vm_ops; > }; > > struct file *ovl_get_realfile(struct file *file) > @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile) > ofe->realfile = realfile; > } > > -void *ovl_get_real_vmops(struct file *file) > +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file) > { > struct ovl_file_entry *ofe = file->private_data; > > return ofe->vm_ops; > } > > -void ovl_set_real_vmops(struct file *file, void *vm_ops) > +void ovl_set_real_vmops(struct file *file, > + const struct vm_operations_struct *vm_ops) > { > struct ovl_file_entry *ofe = file->private_data; > > @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) > return ret; > } > > +vm_fault_t ovl_fault(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct file *file = vma->vm_file; > + struct file *realfile; > + struct file *fpin, *tmp; > + struct inode *inode = file_inode(file); > + struct inode *realinode; > + const struct cred *old_cred; > + bool retry_allowed; > + vm_fault_t ret; > + int err = 0; > + > + if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) { > + realfile = ovl_get_realfile(file); > + > + if (!ovl_has_upperdata(inode) || > + realfile->f_inode != ovl_inode_upper(inode) || > + !realfile->f_op->mmap) > + return VM_FAULT_SIGBUS; > + > + if (!ovl_get_real_vmops(file)) { > + old_cred = ovl_override_creds(inode->i_sb); > + err = call_mmap(realfile, vma); > + revert_creds(old_cred); > + > + vma->vm_file = file; > + if (err) { > + vma->vm_ops = &ovl_vm_ops; > + return VM_FAULT_SIGBUS; > + } > + ovl_set_real_vmops(file, vma->vm_ops); > + vma->vm_ops = &ovl_vm_ops; > + } > + > + retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY); > + if (retry_allowed) > + vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY; > + vma->vm_file = realfile; > + ret = ovl_get_real_vmops(file)->fault(vmf); > + vma->vm_file = file; > + if (retry_allowed) > + vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY; > + return ret; > + > + } else { > + fpin = maybe_unlock_mmap_for_io(vmf, NULL); > + if (!fpin) > + return VM_FAULT_SIGBUS; > + > + ret = VM_FAULT_RETRY; > + if (!ovl_has_upperdata(inode)) { > + err = ovl_copy_up_with_data(file->f_path.dentry); > + if (err) > + goto out; > + } > + > + realinode = ovl_inode_realdata(inode); > + realfile = ovl_open_realfile(file, realinode); > + if (IS_ERR(realfile)) > + goto out; > + > + tmp = ovl_get_realfile(file); > + ovl_set_realfile(file, realfile); > + fput(tmp); > + > +out: > + fput(fpin); > + return ret; > + } > +} Please add some documentation to explain the method used. Do we need to retry if real_vmops are already set? Thanks, Amir.