User has to mmap user_header and user_index vmalloce'd pointers in order to consume events from userspace. Support mapping with possibility to mremap() in the future, i.e. vma does not have VM_DONTEXPAND flag set. User mmaps two pointers: header and index in order to expand both calling mremap(). Expanding is made with support of the fault callback, where page is mmaped with all appropriate size checks. Signed-off-by: Roman Penyaev <rpenyaev@xxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Davidlohr Bueso <dbueso@xxxxxxx> Cc: Jason Baron <jbaron@xxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Andrea Parri <andrea.parri@xxxxxxxxxxxxxxxxxxxx> Cc: linux-fsdevel@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx --- fs/eventpoll.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5de640fcf28b..2849b238f80b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1388,11 +1388,96 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f) } #endif +static vm_fault_t ep_eventpoll_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct eventpoll *ep = vma->vm_file->private_data; + size_t off = vmf->address - vma->vm_start; + vm_fault_t ret; + int rc; + + mutex_lock(&ep->mtx); + ret = VM_FAULT_SIGBUS; + if (!vma->vm_pgoff) { + if (ep->header_length < (off + PAGE_SIZE)) + goto unlock_and_out; + + rc = remap_vmalloc_range_partial(vma, vmf->address, + ep->user_header + off, + PAGE_SIZE); + } else { + if (ep->index_length < (off + PAGE_SIZE)) + goto unlock_and_out; + + rc = remap_vmalloc_range_partial(vma, vmf->address, + ep->user_index + off, + PAGE_SIZE); + } + if (likely(!rc)) { + /* Success path */ + vma->vm_flags &= ~VM_DONTEXPAND; + ret = VM_FAULT_NOPAGE; + } +unlock_and_out: + mutex_unlock(&ep->mtx); + + return ret; +} + +static const struct vm_operations_struct eventpoll_vm_ops = { + .fault = ep_eventpoll_fault, +}; + +static int ep_eventpoll_mmap(struct file *filep, struct vm_area_struct *vma) +{ + struct eventpoll *ep = vma->vm_file->private_data; + size_t size; + int rc; + + if (!ep_polled_by_user(ep)) + return -ENOTSUPP; + + mutex_lock(&ep->mtx); + rc = -ENXIO; + size = vma->vm_end - vma->vm_start; + if (!vma->vm_pgoff && size > ep->header_length) + goto unlock_and_out; + if (vma->vm_pgoff && ep->header_length != (vma->vm_pgoff << PAGE_SHIFT)) + /* + * Index ring starts exactly after header. In future vm_pgoff + * is not used, only as indication what kernel ptr is mapped. + */ + goto unlock_and_out; + if (vma->vm_pgoff && size > ep->index_length) + goto unlock_and_out; + + /* + * vm_pgoff is used *only* for indication, what is mapped: user header + * or user index ring. + */ + if (!vma->vm_pgoff) + rc = remap_vmalloc_range_partial(vma, vma->vm_start, + ep->user_header, size); + else + rc = remap_vmalloc_range_partial(vma, vma->vm_start, + ep->user_index, size); + + if (likely(!rc)) { + vma->vm_flags &= ~VM_DONTEXPAND; + vma->vm_ops = &eventpoll_vm_ops; + } +unlock_and_out: + mutex_unlock(&ep->mtx); + + return rc; +} + /* File callbacks that implement the eventpoll file behaviour */ static const struct file_operations eventpoll_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = ep_show_fdinfo, #endif + .mmap = ep_eventpoll_mmap, .release = ep_eventpoll_release, .poll = ep_eventpoll_poll, .llseek = noop_llseek, -- 2.19.1