On Fri, Aug 27, 2021 at 5:52 PM Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote: > > Turn fault_in_pages_{readable,writeable} into versions that return the > number of bytes not faulted in (similar to copy_to_user) instead of > returning a non-zero value when any of the requested pages couldn't be > faulted in. This supports the existing users that require all pages to > be faulted in as well as new users that are happy if any pages can be > faulted in at all. > > Neither of these functions is entirely trivial and it doesn't seem > useful to inline them, so move them to mm/gup.c. > > Rename the functions to fault_in_{readable,writeable} to make sure that > this change doesn't silently break things. > > Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx> > --- > arch/powerpc/kernel/kvm.c | 3 +- > arch/powerpc/kernel/signal_32.c | 4 +- > arch/powerpc/kernel/signal_64.c | 2 +- > arch/x86/kernel/fpu/signal.c | 7 ++- > drivers/gpu/drm/armada/armada_gem.c | 7 ++- > fs/btrfs/ioctl.c | 5 +- > include/linux/pagemap.h | 57 ++--------------------- > lib/iov_iter.c | 10 ++-- > mm/filemap.c | 2 +- > mm/gup.c | 72 +++++++++++++++++++++++++++++ > 10 files changed, 93 insertions(+), 76 deletions(-) > > diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c > index d89cf802d9aa..6568823cf306 100644 > --- a/arch/powerpc/kernel/kvm.c > +++ b/arch/powerpc/kernel/kvm.c > @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void) > on_each_cpu(kvm_map_magic_page, &features, 1); > > /* Quick self-test to see if the mapping works */ > - if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { > + if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE, > + sizeof(u32))) { > kvm_patching_worked = false; > return; > } > diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c > index 0608581967f0..38c3eae40c14 100644 > --- a/arch/powerpc/kernel/signal_32.c > +++ b/arch/powerpc/kernel/signal_32.c > @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, > if (new_ctx == NULL) > return 0; > if (!access_ok(new_ctx, ctx_size) || > - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) > + fault_in_readable((char __user *)new_ctx, ctx_size)) > return -EFAULT; > > /* > @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, > #endif > > if (!access_ok(ctx, sizeof(*ctx)) || > - fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx))) > + fault_in_readable((char __user *)ctx, sizeof(*ctx))) > return -EFAULT; > > /* > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c > index 1831bba0582e..9f471b4a11e3 100644 > --- a/arch/powerpc/kernel/signal_64.c > +++ b/arch/powerpc/kernel/signal_64.c > @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, > if (new_ctx == NULL) > return 0; > if (!access_ok(new_ctx, ctx_size) || > - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) > + fault_in_readable((char __user *)new_ctx, ctx_size)) > return -EFAULT; > > /* > diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c > index 445c57c9c539..ba6bdec81603 100644 > --- a/arch/x86/kernel/fpu/signal.c > +++ b/arch/x86/kernel/fpu/signal.c > @@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) > fpregs_unlock(); > > if (ret) { > - if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size)) > + if (!fault_in_writeable(buf_fx, fpu_user_xstate_size)) > goto retry; > return -EFAULT; > } > @@ -278,10 +278,9 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore, > if (ret != -EFAULT) > return -EINVAL; > > - ret = fault_in_pages_readable(buf, size); > - if (!ret) > + if (!fault_in_readable(buf, size)) > goto retry; > - return ret; > + return -EFAULT; > } > > /* > diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c > index 21909642ee4c..8fbb25913327 100644 > --- a/drivers/gpu/drm/armada/armada_gem.c > +++ b/drivers/gpu/drm/armada/armada_gem.c > @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, > struct drm_armada_gem_pwrite *args = data; > struct armada_gem_object *dobj; > char __user *ptr; > - int ret; > + int ret = 0; > > DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n", > args->handle, args->offset, args->size, args->ptr); > @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, > if (!access_ok(ptr, args->size)) > return -EFAULT; > > - ret = fault_in_pages_readable(ptr, args->size); > - if (ret) > - return ret; > + if (fault_in_readable(ptr, args->size)) > + return -EFAULT; > > dobj = armada_gem_object_lookup(file, args->handle); > if (dobj == NULL) > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c > index 0ba98e08a029..9233ecc31e2e 100644 > --- a/fs/btrfs/ioctl.c > +++ b/fs/btrfs/ioctl.c > @@ -2244,9 +2244,8 @@ static noinline int search_ioctl(struct inode *inode, > key.offset = sk->min_offset; > > while (1) { > - ret = fault_in_pages_writeable(ubuf + sk_offset, > - *buf_size - sk_offset); > - if (ret) > + ret = -EFAULT; > + if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset)) > break; > > ret = btrfs_search_forward(root, &key, path, sk->min_transid); > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h > index ed02aa522263..7c9edc9694d9 100644 > --- a/include/linux/pagemap.h > +++ b/include/linux/pagemap.h > @@ -734,61 +734,10 @@ int wait_on_page_private_2_killable(struct page *page); > extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter); > > /* > - * Fault everything in given userspace address range in. > + * Fault in userspace address range. > */ > -static inline int fault_in_pages_writeable(char __user *uaddr, int size) > -{ > - char __user *end = uaddr + size - 1; > - > - if (unlikely(size == 0)) > - return 0; > - > - if (unlikely(uaddr > end)) > - return -EFAULT; > - /* > - * Writing zeroes into userspace here is OK, because we know that if > - * the zero gets there, we'll be overwriting it. > - */ > - do { > - if (unlikely(__put_user(0, uaddr) != 0)) > - return -EFAULT; > - uaddr += PAGE_SIZE; > - } while (uaddr <= end); > - > - /* Check whether the range spilled into the next page. */ > - if (((unsigned long)uaddr & PAGE_MASK) == > - ((unsigned long)end & PAGE_MASK)) > - return __put_user(0, end); > - > - return 0; > -} > - > -static inline int fault_in_pages_readable(const char __user *uaddr, int size) > -{ > - volatile char c; > - const char __user *end = uaddr + size - 1; > - > - if (unlikely(size == 0)) > - return 0; > - > - if (unlikely(uaddr > end)) > - return -EFAULT; > - > - do { > - if (unlikely(__get_user(c, uaddr) != 0)) > - return -EFAULT; > - uaddr += PAGE_SIZE; > - } while (uaddr <= end); > - > - /* Check whether the range spilled into the next page. */ > - if (((unsigned long)uaddr & PAGE_MASK) == > - ((unsigned long)end & PAGE_MASK)) { > - return __get_user(c, end); > - } > - > - (void)c; > - return 0; > -} > +size_t fault_in_writeable(char __user *uaddr, size_t size); > +size_t fault_in_readable(const char __user *uaddr, size_t size); > > int add_to_page_cache_locked(struct page *page, struct address_space *mapping, > pgoff_t index, gfp_t gfp_mask); > diff --git a/lib/iov_iter.c b/lib/iov_iter.c > index 25dfc48536d7..069cedd9d7b4 100644 > --- a/lib/iov_iter.c > +++ b/lib/iov_iter.c > @@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b > buf = iov->iov_base + skip; > copy = min(bytes, iov->iov_len - skip); > > - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { > + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) { > kaddr = kmap_atomic(page); > from = kaddr + offset; > > @@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t > buf = iov->iov_base + skip; > copy = min(bytes, iov->iov_len - skip); > > - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { > + if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) { > kaddr = kmap_atomic(page); > to = kaddr + offset; > > @@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes) > bytes = i->count; > for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) { > size_t len = min(bytes, p->iov_len - skip); > - int err; > > if (unlikely(!len)) > continue; > - err = fault_in_pages_readable(p->iov_base + skip, len); > - if (unlikely(err)) > - return err; > + if (fault_in_readable(p->iov_base + skip, len)) > + return -EFAULT; > bytes -= len; > } > } > diff --git a/mm/filemap.c b/mm/filemap.c > index d1458ecf2f51..4dec3bc7752e 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -88,7 +88,7 @@ > * ->lock_page (access_process_vm) > * > * ->i_mutex (generic_perform_write) > - * ->mmap_lock (fault_in_pages_readable->do_page_fault) > + * ->mmap_lock (fault_in_readable->do_page_fault) > * > * bdi->wb.list_lock > * sb_lock (fs/fs-writeback.c) > diff --git a/mm/gup.c b/mm/gup.c > index b94717977d17..0cf47955e5a1 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -1672,6 +1672,78 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, > } > #endif /* !CONFIG_MMU */ > > +/** > + * fault_in_writeable - fault in userspace address range for writing > + * @uaddr: start of address range > + * @size: size of address range > + * > + * Returns the number of bytes not faulted in (like copy_to_user() and > + * copy_from_user()). > + */ > +size_t fault_in_writeable(char __user *uaddr, size_t size) > +{ > + char __user *start = uaddr, *end; > + > + if (unlikely(size == 0)) > + return 0; > + if (!PAGE_ALIGNED(uaddr)) { > + if (unlikely(__put_user(0, uaddr) != 0)) > + return size; > + uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr); > + } > + end = (char __user *)PAGE_ALIGN((unsigned long)start + size); > + if (unlikely(end < start)) > + end = NULL; > + while (uaddr != end) { > + if (unlikely(__put_user(0, uaddr) != 0)) > + goto out; > + uaddr += PAGE_SIZE; Won't we loop endlessly or corrupt some unwanted page when 'end' was set to NULL? > + } > + > +out: > + if (size > uaddr - start) > + return size - (uaddr - start); > + return 0; > +} > +EXPORT_SYMBOL(fault_in_writeable); > + > +/** > + * fault_in_readable - fault in userspace address range for reading > + * @uaddr: start of user address range > + * @size: size of user address range > + * > + * Returns the number of bytes not faulted in (like copy_to_user() and > + * copy_from_user()). > + */ > +size_t fault_in_readable(const char __user *uaddr, size_t size) > +{ > + const char __user *start = uaddr, *end; > + volatile char c; > + > + if (unlikely(size == 0)) > + return 0; > + if (!PAGE_ALIGNED(uaddr)) { > + if (unlikely(__get_user(c, uaddr) != 0)) > + return size; > + uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr); > + } > + end = (const char __user *)PAGE_ALIGN((unsigned long)start + size); > + if (unlikely(end < start)) > + end = NULL; > + while (uaddr != end) { Same kind of issue here, when 'end' was set to NULL? Thanks. > + if (unlikely(__get_user(c, uaddr) != 0)) > + goto out; > + uaddr += PAGE_SIZE; > + } > + > +out: > + (void)c; > + if (size > uaddr - start) > + return size - (uaddr - start); > + return 0; > +} > +EXPORT_SYMBOL(fault_in_readable); > + > /** > * get_dump_page() - pin user page in memory while writing it to core dump > * @addr: user address > -- > 2.26.3 > -- Filipe David Manana, “Whether you think you can, or you think you can't — you're right.”