Re: [PATCH v7 03/19] gup: Turn fault_in_pages_{readable,writeable} into fault_in_{readable,writeable}

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 27, 2021 at 5:52 PM Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote:
>
> Turn fault_in_pages_{readable,writeable} into versions that return the
> number of bytes not faulted in (similar to copy_to_user) instead of
> returning a non-zero value when any of the requested pages couldn't be
> faulted in.  This supports the existing users that require all pages to
> be faulted in as well as new users that are happy if any pages can be
> faulted in at all.
>
> Neither of these functions is entirely trivial and it doesn't seem
> useful to inline them, so move them to mm/gup.c.
>
> Rename the functions to fault_in_{readable,writeable} to make sure that
> this change doesn't silently break things.
>
> Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
> ---
>  arch/powerpc/kernel/kvm.c           |  3 +-
>  arch/powerpc/kernel/signal_32.c     |  4 +-
>  arch/powerpc/kernel/signal_64.c     |  2 +-
>  arch/x86/kernel/fpu/signal.c        |  7 ++-
>  drivers/gpu/drm/armada/armada_gem.c |  7 ++-
>  fs/btrfs/ioctl.c                    |  5 +-
>  include/linux/pagemap.h             | 57 ++---------------------
>  lib/iov_iter.c                      | 10 ++--
>  mm/filemap.c                        |  2 +-
>  mm/gup.c                            | 72 +++++++++++++++++++++++++++++
>  10 files changed, 93 insertions(+), 76 deletions(-)
>
> diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
> index d89cf802d9aa..6568823cf306 100644
> --- a/arch/powerpc/kernel/kvm.c
> +++ b/arch/powerpc/kernel/kvm.c
> @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
>         on_each_cpu(kvm_map_magic_page, &features, 1);
>
>         /* Quick self-test to see if the mapping works */
> -       if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
> +       if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
> +                             sizeof(u32))) {
>                 kvm_patching_worked = false;
>                 return;
>         }
> diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
> index 0608581967f0..38c3eae40c14 100644
> --- a/arch/powerpc/kernel/signal_32.c
> +++ b/arch/powerpc/kernel/signal_32.c
> @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
>         if (new_ctx == NULL)
>                 return 0;
>         if (!access_ok(new_ctx, ctx_size) ||
> -           fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> +           fault_in_readable((char __user *)new_ctx, ctx_size))
>                 return -EFAULT;
>
>         /*
> @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
>  #endif
>
>         if (!access_ok(ctx, sizeof(*ctx)) ||
> -           fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
> +           fault_in_readable((char __user *)ctx, sizeof(*ctx)))
>                 return -EFAULT;
>
>         /*
> diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> index 1831bba0582e..9f471b4a11e3 100644
> --- a/arch/powerpc/kernel/signal_64.c
> +++ b/arch/powerpc/kernel/signal_64.c
> @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
>         if (new_ctx == NULL)
>                 return 0;
>         if (!access_ok(new_ctx, ctx_size) ||
> -           fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> +           fault_in_readable((char __user *)new_ctx, ctx_size))
>                 return -EFAULT;
>
>         /*
> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> index 445c57c9c539..ba6bdec81603 100644
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
>         fpregs_unlock();
>
>         if (ret) {
> -               if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
> +               if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
>                         goto retry;
>                 return -EFAULT;
>         }
> @@ -278,10 +278,9 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
>                 if (ret != -EFAULT)
>                         return -EINVAL;
>
> -               ret = fault_in_pages_readable(buf, size);
> -               if (!ret)
> +               if (!fault_in_readable(buf, size))
>                         goto retry;
> -               return ret;
> +               return -EFAULT;
>         }
>
>         /*
> diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
> index 21909642ee4c..8fbb25913327 100644
> --- a/drivers/gpu/drm/armada/armada_gem.c
> +++ b/drivers/gpu/drm/armada/armada_gem.c
> @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>         struct drm_armada_gem_pwrite *args = data;
>         struct armada_gem_object *dobj;
>         char __user *ptr;
> -       int ret;
> +       int ret = 0;
>
>         DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
>                 args->handle, args->offset, args->size, args->ptr);
> @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>         if (!access_ok(ptr, args->size))
>                 return -EFAULT;
>
> -       ret = fault_in_pages_readable(ptr, args->size);
> -       if (ret)
> -               return ret;
> +       if (fault_in_readable(ptr, args->size))
> +               return -EFAULT;
>
>         dobj = armada_gem_object_lookup(file, args->handle);
>         if (dobj == NULL)
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 0ba98e08a029..9233ecc31e2e 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -2244,9 +2244,8 @@ static noinline int search_ioctl(struct inode *inode,
>         key.offset = sk->min_offset;
>
>         while (1) {
> -               ret = fault_in_pages_writeable(ubuf + sk_offset,
> -                                              *buf_size - sk_offset);
> -               if (ret)
> +               ret = -EFAULT;
> +               if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
>                         break;
>
>                 ret = btrfs_search_forward(root, &key, path, sk->min_transid);
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index ed02aa522263..7c9edc9694d9 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -734,61 +734,10 @@ int wait_on_page_private_2_killable(struct page *page);
>  extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
>
>  /*
> - * Fault everything in given userspace address range in.
> + * Fault in userspace address range.
>   */
> -static inline int fault_in_pages_writeable(char __user *uaddr, int size)
> -{
> -       char __user *end = uaddr + size - 1;
> -
> -       if (unlikely(size == 0))
> -               return 0;
> -
> -       if (unlikely(uaddr > end))
> -               return -EFAULT;
> -       /*
> -        * Writing zeroes into userspace here is OK, because we know that if
> -        * the zero gets there, we'll be overwriting it.
> -        */
> -       do {
> -               if (unlikely(__put_user(0, uaddr) != 0))
> -                       return -EFAULT;
> -               uaddr += PAGE_SIZE;
> -       } while (uaddr <= end);
> -
> -       /* Check whether the range spilled into the next page. */
> -       if (((unsigned long)uaddr & PAGE_MASK) ==
> -                       ((unsigned long)end & PAGE_MASK))
> -               return __put_user(0, end);
> -
> -       return 0;
> -}
> -
> -static inline int fault_in_pages_readable(const char __user *uaddr, int size)
> -{
> -       volatile char c;
> -       const char __user *end = uaddr + size - 1;
> -
> -       if (unlikely(size == 0))
> -               return 0;
> -
> -       if (unlikely(uaddr > end))
> -               return -EFAULT;
> -
> -       do {
> -               if (unlikely(__get_user(c, uaddr) != 0))
> -                       return -EFAULT;
> -               uaddr += PAGE_SIZE;
> -       } while (uaddr <= end);
> -
> -       /* Check whether the range spilled into the next page. */
> -       if (((unsigned long)uaddr & PAGE_MASK) ==
> -                       ((unsigned long)end & PAGE_MASK)) {
> -               return __get_user(c, end);
> -       }
> -
> -       (void)c;
> -       return 0;
> -}
> +size_t fault_in_writeable(char __user *uaddr, size_t size);
> +size_t fault_in_readable(const char __user *uaddr, size_t size);
>
>  int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
>                                 pgoff_t index, gfp_t gfp_mask);
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 25dfc48536d7..069cedd9d7b4 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
>         buf = iov->iov_base + skip;
>         copy = min(bytes, iov->iov_len - skip);
>
> -       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
> +       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
>                 kaddr = kmap_atomic(page);
>                 from = kaddr + offset;
>
> @@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
>         buf = iov->iov_base + skip;
>         copy = min(bytes, iov->iov_len - skip);
>
> -       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
> +       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
>                 kaddr = kmap_atomic(page);
>                 to = kaddr + offset;
>
> @@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
>                         bytes = i->count;
>                 for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
>                         size_t len = min(bytes, p->iov_len - skip);
> -                       int err;
>
>                         if (unlikely(!len))
>                                 continue;
> -                       err = fault_in_pages_readable(p->iov_base + skip, len);
> -                       if (unlikely(err))
> -                               return err;
> +                       if (fault_in_readable(p->iov_base + skip, len))
> +                               return -EFAULT;
>                         bytes -= len;
>                 }
>         }
> diff --git a/mm/filemap.c b/mm/filemap.c
> index d1458ecf2f51..4dec3bc7752e 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -88,7 +88,7 @@
>   *    ->lock_page              (access_process_vm)
>   *
>   *  ->i_mutex                  (generic_perform_write)
> - *    ->mmap_lock              (fault_in_pages_readable->do_page_fault)
> + *    ->mmap_lock              (fault_in_readable->do_page_fault)
>   *
>   *  bdi->wb.list_lock
>   *    sb_lock                  (fs/fs-writeback.c)
> diff --git a/mm/gup.c b/mm/gup.c
> index b94717977d17..0cf47955e5a1 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1672,6 +1672,78 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
>  }
>  #endif /* !CONFIG_MMU */
>
> +/**
> + * fault_in_writeable - fault in userspace address range for writing
> + * @uaddr: start of address range
> + * @size: size of address range
> + *
> + * Returns the number of bytes not faulted in (like copy_to_user() and
> + * copy_from_user()).
> + */
> +size_t fault_in_writeable(char __user *uaddr, size_t size)
> +{
> +       char __user *start = uaddr, *end;
> +
> +       if (unlikely(size == 0))
> +               return 0;
> +       if (!PAGE_ALIGNED(uaddr)) {
> +               if (unlikely(__put_user(0, uaddr) != 0))
> +                       return size;
> +               uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
> +       }
> +       end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
> +       if (unlikely(end < start))
> +               end = NULL;
> +       while (uaddr != end) {
> +               if (unlikely(__put_user(0, uaddr) != 0))
> +                       goto out;
> +               uaddr += PAGE_SIZE;

Won't we loop endlessly or corrupt some unwanted page when 'end' was
set to NULL?

> +       }
> +
> +out:
> +       if (size > uaddr - start)
> +               return size - (uaddr - start);
> +       return 0;
> +}
> +EXPORT_SYMBOL(fault_in_writeable);
> +
> +/**
> + * fault_in_readable - fault in userspace address range for reading
> + * @uaddr: start of user address range
> + * @size: size of user address range
> + *
> + * Returns the number of bytes not faulted in (like copy_to_user() and
> + * copy_from_user()).
> + */
> +size_t fault_in_readable(const char __user *uaddr, size_t size)
> +{
> +       const char __user *start = uaddr, *end;
> +       volatile char c;
> +
> +       if (unlikely(size == 0))
> +               return 0;
> +       if (!PAGE_ALIGNED(uaddr)) {
> +               if (unlikely(__get_user(c, uaddr) != 0))
> +                       return size;
> +               uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
> +       }
> +       end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
> +       if (unlikely(end < start))
> +               end = NULL;
> +       while (uaddr != end) {

Same kind of issue here, when 'end' was set to NULL?

Thanks.

> +               if (unlikely(__get_user(c, uaddr) != 0))
> +                       goto out;
> +               uaddr += PAGE_SIZE;
> +       }
> +
> +out:
> +       (void)c;
> +       if (size > uaddr - start)
> +               return size - (uaddr - start);
> +       return 0;
> +}
> +EXPORT_SYMBOL(fault_in_readable);
> +
>  /**
>   * get_dump_page() - pin user page in memory while writing it to core dump
>   * @addr: user address
> --
> 2.26.3
>


-- 
Filipe David Manana,

“Whether you think you can, or you think you can't — you're right.”




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux