On Thu, Apr 8, 2021 at 1:40 PM Chengguang Xu <cgxu519@xxxxxxxxxxxx> wrote: > > ---- 在 星期四, 2021-04-08 19:29:55 Miklos Szeredi <miklos@xxxxxxxxxx> 撰写 ---- > > On Thu, Apr 8, 2021 at 1:28 PM Chengguang Xu <cgxu519@xxxxxxxxxxxx> wrote: > > > > > > ---- 在 星期四, 2021-04-08 19:20:42 Chengguang Xu <cgxu519@xxxxxxxxxxxx> 撰写 ---- > > > > In overlayfs copy-up, if open flag has O_TRUNC then upper > > > > file will truncate to zero size, in this case we should check > > > > VM_DENYWRITE mappings to keep compatibility with other filesystems. > > > > Can you provide a test case for the bug that this is fixing? > > > > Execute binary file(keep running until open) in overlayfs which only has lower && open the binary file with flag O_RDWR|O_TRUNC > > Expected result: open fail with -ETXTBSY > > Actual result: open success Worse, it's possible to get a "Bus error" with just execute and write on an overlayfs file, which i_writecount is supposed to protect. The reason is that the put_write_access() call in __vma_link_file() assumes an already negative writecount, but because of the vm_file shuffle in ovl_mmap() that's not guaranteed. There's even a comment about exactly this situation in mmap(): /* ->mmap() can change vma->vm_file, but must guarantee that * vma_link() below can deny write-access if VM_DENYWRITE is set * and map writably if VM_SHARED is set. This usually means the * new file must not have been exposed to user-space, yet. */ The attached patch fixes this, but not your original bug. That could be addressed by checking the writecount on *both* lower and upper for open for write/truncate. Note: this could be checked before copy-up, but that's not reliable alone, because the copy up could happen due to meta-data update, for example, and then the open/truncate wouldn't trigger the writecount check. Something like the second attached patch? Thanks, Miklos
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index dbfb35fb0ff7..5b5b4410c0f4 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -422,6 +422,7 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) { struct file *realfile = file->private_data; const struct cred *old_cred; + vm_flags_t vm_flags = vma->vm_flags; int ret; if (!realfile->f_op->mmap) @@ -430,6 +431,15 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) if (WARN_ON(file != vma->vm_file)) return -EIO; + /* Get temporary denial counts on realfile */ + if (vm_flags & VM_DENYWRITE && + (ret = deny_write_access(realfile))) + goto out; + + if (vm_flags & VM_SHARED && + (ret = mapping_map_writable(file->f_mapping))) + goto allow_write; + vma->vm_file = get_file(realfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); @@ -446,6 +456,13 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) ovl_file_accessed(file); + /* Undo temporary denial counts */ + if (vm_flags & VM_SHARED) + mapping_unmap_writable(realfile->f_mapping); +allow_write: + if (vm_flags & VM_DENYWRITE) + allow_write_access(realfile); +out: return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index 3f287599a7a3..15b082c701c7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -659,11 +659,18 @@ static void __vma_link_file(struct vm_area_struct *vma) file = vma->vm_file; if (file) { struct address_space *mapping = file->f_mapping; + struct inode *inode = file_inode(file); - if (vma->vm_flags & VM_DENYWRITE) - put_write_access(file_inode(file)); - if (vma->vm_flags & VM_SHARED) + if (vma->vm_flags & VM_DENYWRITE) { + /* This is an unconditional deny_write_access() */ + WARN_ON(atomic_read(&inode->i_writecount) > 0); + put_write_access(inode); + } + if (vma->vm_flags & VM_SHARED) { + /* This is an unconditional mapping_map_writable() */ + WARN_ON(atomic_read(&mapping->i_mmap_writable) < 0); mapping_allow_writable(mapping); + } flush_dcache_mmap_lock(mapping); vma_interval_tree_insert(vma, &mapping->i_mmap);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index dbfb35fb0ff7..504107dd6bab 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -144,8 +144,17 @@ static int ovl_real_fdget(const struct file *file, struct fd *real) static int ovl_open(struct inode *inode, struct file *file) { struct file *realfile; + struct inode *lowerinode, *upperinode; int err; + lowerinode = ovl_inode_lower(inode); + upperinode = ovl_inode_upper(inode); + + if (((file->f_mode & FMODE_WRITE) || file->f_flags & O_TRUNC) && + ((lowerinode && atomic_read(&lowerinode->i_writecount) < 0) || + (upperinode && atomic_read(&upperinode->i_writecount) < 0))) + return -ETXTBSY; + err = ovl_maybe_copy_up(file_dentry(file), file->f_flags); if (err) return err;