Implement stacked mmap for shared map to keep data consistency. Signed-off-by: Chengguang Xu <cgxu519@xxxxxxxxxxxx> --- fs/overlayfs/file.c | 120 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 6 deletions(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 14ab5344a918..db5ab200d984 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -21,9 +21,17 @@ struct ovl_aio_req { struct fd fd; }; +static vm_fault_t ovl_fault(struct vm_fault *vmf); +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf); + +static const struct vm_operations_struct ovl_vm_ops = { + .fault = ovl_fault, + .page_mkwrite = ovl_page_mkwrite, +}; + struct ovl_file_entry { struct file *realfile; - void *vm_ops; + const struct vm_operations_struct *vm_ops; }; struct file *ovl_get_realfile(struct file *file) @@ -40,14 +48,15 @@ void ovl_set_realfile(struct file *file, struct file *realfile) ofe->realfile = realfile; } -void *ovl_get_real_vmops(struct file *file) +const struct vm_operations_struct *ovl_get_real_vmops(struct file *file) { struct ovl_file_entry *ofe = file->private_data; return ofe->vm_ops; } -void ovl_set_real_vmops(struct file *file, void *vm_ops) +void ovl_set_real_vmops(struct file *file, + const struct vm_operations_struct *vm_ops) { struct ovl_file_entry *ofe = file->private_data; @@ -493,11 +502,104 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +vm_fault_t ovl_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *file = vma->vm_file; + struct file *realfile; + struct file *fpin, *tmp; + struct inode *inode = file_inode(file); + struct inode *realinode; + const struct cred *old_cred; + bool retry_allowed; + vm_fault_t ret; + int err = 0; + + if (fault_flag_check(vmf, FAULT_FLAG_TRIED)) { + realfile = ovl_get_realfile(file); + + if (!ovl_has_upperdata(inode) || + realfile->f_inode != ovl_inode_upper(inode) || + !realfile->f_op->mmap) + return VM_FAULT_SIGBUS; + + if (!ovl_get_real_vmops(file)) { + old_cred = ovl_override_creds(inode->i_sb); + err = call_mmap(realfile, vma); + revert_creds(old_cred); + + vma->vm_file = file; + if (err) { + vma->vm_ops = &ovl_vm_ops; + return VM_FAULT_SIGBUS; + } + ovl_set_real_vmops(file, vma->vm_ops); + vma->vm_ops = &ovl_vm_ops; + } + + retry_allowed = fault_flag_check(vmf, FAULT_FLAG_ALLOW_RETRY); + if (retry_allowed) + vma->vm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + vma->vm_file = realfile; + ret = ovl_get_real_vmops(file)->fault(vmf); + vma->vm_file = file; + if (retry_allowed) + vma->vm_flags |= FAULT_FLAG_ALLOW_RETRY; + return ret; + + } else { + fpin = maybe_unlock_mmap_for_io(vmf, NULL); + if (!fpin) + return VM_FAULT_SIGBUS; + + ret = VM_FAULT_RETRY; + if (!ovl_has_upperdata(inode)) { + err = ovl_copy_up_with_data(file->f_path.dentry); + if (err) + goto out; + } + + realinode = ovl_inode_realdata(inode); + realfile = ovl_open_realfile(file, realinode); + if (IS_ERR(realfile)) + goto out; + + tmp = ovl_get_realfile(file); + ovl_set_realfile(file, realfile); + fput(tmp); + +out: + fput(fpin); + return ret; + } +} + +static vm_fault_t ovl_page_mkwrite(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *file = vma->vm_file; + struct file *realfile; + struct inode *inode = file_inode(file); + vm_fault_t ret; + + realfile = ovl_get_realfile(file); + + sb_start_pagefault(inode->i_sb); + file_update_time(file); + + vma->vm_file = realfile; + ret = ovl_get_real_vmops(file)->page_mkwrite(vmf); + vma->vm_file = file; + + sb_end_pagefault(inode->i_sb); + return ret; +} + static int ovl_mmap(struct file *file, struct vm_area_struct *vma) { struct file *realfile = ovl_get_realfile(file); const struct cred *old_cred; - int ret; + int ret = 0; if (!realfile->f_op->mmap) return -ENODEV; @@ -505,6 +607,13 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) if (WARN_ON(file != vma->vm_file)) return -EIO; + if (!ovl_has_upperdata(file_inode(file)) && + (vma->vm_flags & (VM_SHARED|VM_MAYSHARE))) { + vma->vm_ops = &ovl_vm_ops; + ovl_file_accessed(file); + return 0; + } + vma->vm_file = get_file(realfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); @@ -517,10 +626,9 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) } else { /* Drop reference count from previous vm_file value */ fput(file); + ovl_file_accessed(file); } - ovl_file_accessed(file); - return ret; } -- 2.20.1