On Mon, Sep 09, 2019 at 06:04:12PM +0300, Kirill A. Shutemov wrote: > On Mon, Sep 09, 2019 at 06:55:21AM -0700, Matthew Wilcox wrote: > > On Mon, Sep 02, 2019 at 05:20:30PM +0300, Kirill A. Shutemov wrote: > > > On Mon, Sep 02, 2019 at 06:52:54AM -0700, Matthew Wilcox wrote: > > > > On Sat, Aug 31, 2019 at 12:58:26PM +0800, Hillf Danton wrote: > > > > > On Fri, 30 Aug 2019 12:40:06 -0700 > > > > > > syzbot found the following crash on: > > > > > > > > > > > > HEAD commit: a55aa89a Linux 5.3-rc6 > > > > > > git tree: upstream > > > > > > console output: https://syzkaller.appspot.com/x/log.txt?x=12f4beb6600000 > > > > > > kernel config: https://syzkaller.appspot.com/x/.config?x=2a6a2b9826fdadf9 > > > > > > dashboard link: https://syzkaller.appspot.com/bug?extid=03ee87124ee05af991bd > > > > > > compiler: gcc (GCC) 9.0.0 20181231 (experimental) > > > > > > > > > > > > ================================================================== > > > > > > BUG: KASAN: use-after-free in perf_trace_lock_acquire+0x401/0x530 > > > > > > include/trace/events/lock.h:13 > > > > > > Read of size 8 at addr ffff8880a5cf2c50 by task syz-executor.0/26173 > > > > > > > > > > --- a/mm/shmem.c > > > > > +++ b/mm/shmem.c > > > > > @@ -2021,6 +2021,12 @@ static vm_fault_t shmem_fault(struct vm_ > > > > > shmem_falloc_waitq = shmem_falloc->waitq; > > > > > prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, > > > > > TASK_UNINTERRUPTIBLE); > > > > > + /* > > > > > + * it is not trivial to see what will take place after > > > > > + * releasing i_lock and taking a nap, so hold inode to > > > > > + * be on the safe side. > > > > > > > > I think the comment could be improved. How about: > > > > > > > > * The file could be unmapped by another thread after > > > > * releasing i_lock, and the inode then freed. Hold > > > > * a reference to the inode to prevent this. > > > > > > It only can happen if mmap_sem was released, so it's better to put > > > __iget() to the branch above next to up_read(). I've got confused at first > > > how it is possible from ->fault(). > > > > > > This way iput() below should only be called for ret == VM_FAULT_RETRY. > > > > Looking at the rather similar construct in filemap.c, should we solve > > it the same way, where we inc the refcount on the struct file instead > > of the inode before releasing the mmap_sem? > > Are you talking about maybe_unlock_mmap_for_io()? Yeah, worth moving it to > mm/internal.h and reuse. > > Care to prepare the patch? :P Something like this? Untested. diff --git a/mm/filemap.c b/mm/filemap.c index d0cf700bf201..a542f72f57cc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2349,26 +2349,6 @@ EXPORT_SYMBOL(generic_file_read_iter); #ifdef CONFIG_MMU #define MMAP_LOTSAMISS (100) -static struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, - struct file *fpin) -{ - int flags = vmf->flags; - - if (fpin) - return fpin; - - /* - * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or - * anything, so we only pin the file and drop the mmap_sem if only - * FAULT_FLAG_ALLOW_RETRY is set. - */ - if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == - FAULT_FLAG_ALLOW_RETRY) { - fpin = get_file(vmf->vma->vm_file); - up_read(&vmf->vma->vm_mm->mmap_sem); - } - return fpin; -} /* * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem diff --git a/mm/internal.h b/mm/internal.h index e32390802fd3..75ffa646de82 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -362,6 +362,27 @@ vma_address(struct page *page, struct vm_area_struct *vma) return max(start, vma->vm_start); } +static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, + struct file *fpin) +{ + int flags = vmf->flags; + + if (fpin) + return fpin; + + /* + * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or + * anything, so we only pin the file and drop the mmap_sem if only + * FAULT_FLAG_ALLOW_RETRY is set. + */ + if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == + FAULT_FLAG_ALLOW_RETRY) { + fpin = get_file(vmf->vma->vm_file); + up_read(&vmf->vma->vm_mm->mmap_sem); + } + return fpin; +} + #else /* !CONFIG_MMU */ static inline void clear_page_mlock(struct page *page) { } static inline void mlock_vma_page(struct page *page) { } diff --git a/mm/shmem.c b/mm/shmem.c index 2bed4761f279..551fa49eb7f6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2007,16 +2007,14 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) shmem_falloc->waitq && vmf->pgoff >= shmem_falloc->start && vmf->pgoff < shmem_falloc->next) { + struct file *fpin = NULL; wait_queue_head_t *shmem_falloc_waitq; DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); ret = VM_FAULT_NOPAGE; - if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { - /* It's polite to up mmap_sem if we can */ - up_read(&vma->vm_mm->mmap_sem); + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + if (fpin) ret = VM_FAULT_RETRY; - } shmem_falloc_waitq = shmem_falloc->waitq; prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, @@ -2034,6 +2032,9 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) spin_lock(&inode->i_lock); finish_wait(shmem_falloc_waitq, &shmem_fault_wait); spin_unlock(&inode->i_lock); + + if (fpin) + fput(fpin); return ret; } spin_unlock(&inode->i_lock); -- Kirill A. Shutemov