Re: [PATCH v7 03/19] gup: Turn fault_in_pages_{readable,writeable} into fault_in_{readable,writeable}

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Sep 3, 2021 at 4:57 PM Filipe Manana <fdmanana@xxxxxxxxx> wrote:
> On Fri, Aug 27, 2021 at 5:52 PM Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote:
> >
> > Turn fault_in_pages_{readable,writeable} into versions that return the
> > number of bytes not faulted in (similar to copy_to_user) instead of
> > returning a non-zero value when any of the requested pages couldn't be
> > faulted in.  This supports the existing users that require all pages to
> > be faulted in as well as new users that are happy if any pages can be
> > faulted in at all.
> >
> > Neither of these functions is entirely trivial and it doesn't seem
> > useful to inline them, so move them to mm/gup.c.
> >
> > Rename the functions to fault_in_{readable,writeable} to make sure that
> > this change doesn't silently break things.
> >
> > Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
> > ---
> >  arch/powerpc/kernel/kvm.c           |  3 +-
> >  arch/powerpc/kernel/signal_32.c     |  4 +-
> >  arch/powerpc/kernel/signal_64.c     |  2 +-
> >  arch/x86/kernel/fpu/signal.c        |  7 ++-
> >  drivers/gpu/drm/armada/armada_gem.c |  7 ++-
> >  fs/btrfs/ioctl.c                    |  5 +-
> >  include/linux/pagemap.h             | 57 ++---------------------
> >  lib/iov_iter.c                      | 10 ++--
> >  mm/filemap.c                        |  2 +-
> >  mm/gup.c                            | 72 +++++++++++++++++++++++++++++
> >  10 files changed, 93 insertions(+), 76 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
> > index d89cf802d9aa..6568823cf306 100644
> > --- a/arch/powerpc/kernel/kvm.c
> > +++ b/arch/powerpc/kernel/kvm.c
> > @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
> >         on_each_cpu(kvm_map_magic_page, &features, 1);
> >
> >         /* Quick self-test to see if the mapping works */
> > -       if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
> > +       if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
> > +                             sizeof(u32))) {
> >                 kvm_patching_worked = false;
> >                 return;
> >         }
> > diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
> > index 0608581967f0..38c3eae40c14 100644
> > --- a/arch/powerpc/kernel/signal_32.c
> > +++ b/arch/powerpc/kernel/signal_32.c
> > @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> >         if (new_ctx == NULL)
> >                 return 0;
> >         if (!access_ok(new_ctx, ctx_size) ||
> > -           fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> > +           fault_in_readable((char __user *)new_ctx, ctx_size))
> >                 return -EFAULT;
> >
> >         /*
> > @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
> >  #endif
> >
> >         if (!access_ok(ctx, sizeof(*ctx)) ||
> > -           fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
> > +           fault_in_readable((char __user *)ctx, sizeof(*ctx)))
> >                 return -EFAULT;
> >
> >         /*
> > diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
> > index 1831bba0582e..9f471b4a11e3 100644
> > --- a/arch/powerpc/kernel/signal_64.c
> > +++ b/arch/powerpc/kernel/signal_64.c
> > @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
> >         if (new_ctx == NULL)
> >                 return 0;
> >         if (!access_ok(new_ctx, ctx_size) ||
> > -           fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
> > +           fault_in_readable((char __user *)new_ctx, ctx_size))
> >                 return -EFAULT;
> >
> >         /*
> > diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> > index 445c57c9c539..ba6bdec81603 100644
> > --- a/arch/x86/kernel/fpu/signal.c
> > +++ b/arch/x86/kernel/fpu/signal.c
> > @@ -205,7 +205,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
> >         fpregs_unlock();
> >
> >         if (ret) {
> > -               if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
> > +               if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
> >                         goto retry;
> >                 return -EFAULT;
> >         }
> > @@ -278,10 +278,9 @@ static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
> >                 if (ret != -EFAULT)
> >                         return -EINVAL;
> >
> > -               ret = fault_in_pages_readable(buf, size);
> > -               if (!ret)
> > +               if (!fault_in_readable(buf, size))
> >                         goto retry;
> > -               return ret;
> > +               return -EFAULT;
> >         }
> >
> >         /*
> > diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
> > index 21909642ee4c..8fbb25913327 100644
> > --- a/drivers/gpu/drm/armada/armada_gem.c
> > +++ b/drivers/gpu/drm/armada/armada_gem.c
> > @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >         struct drm_armada_gem_pwrite *args = data;
> >         struct armada_gem_object *dobj;
> >         char __user *ptr;
> > -       int ret;
> > +       int ret = 0;
> >
> >         DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
> >                 args->handle, args->offset, args->size, args->ptr);
> > @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >         if (!access_ok(ptr, args->size))
> >                 return -EFAULT;
> >
> > -       ret = fault_in_pages_readable(ptr, args->size);
> > -       if (ret)
> > -               return ret;
> > +       if (fault_in_readable(ptr, args->size))
> > +               return -EFAULT;
> >
> >         dobj = armada_gem_object_lookup(file, args->handle);
> >         if (dobj == NULL)
> > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> > index 0ba98e08a029..9233ecc31e2e 100644
> > --- a/fs/btrfs/ioctl.c
> > +++ b/fs/btrfs/ioctl.c
> > @@ -2244,9 +2244,8 @@ static noinline int search_ioctl(struct inode *inode,
> >         key.offset = sk->min_offset;
> >
> >         while (1) {
> > -               ret = fault_in_pages_writeable(ubuf + sk_offset,
> > -                                              *buf_size - sk_offset);
> > -               if (ret)
> > +               ret = -EFAULT;
> > +               if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
> >                         break;
> >
> >                 ret = btrfs_search_forward(root, &key, path, sk->min_transid);
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index ed02aa522263..7c9edc9694d9 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -734,61 +734,10 @@ int wait_on_page_private_2_killable(struct page *page);
> >  extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
> >
> >  /*
> > - * Fault everything in given userspace address range in.
> > + * Fault in userspace address range.
> >   */
> > -static inline int fault_in_pages_writeable(char __user *uaddr, int size)
> > -{
> > -       char __user *end = uaddr + size - 1;
> > -
> > -       if (unlikely(size == 0))
> > -               return 0;
> > -
> > -       if (unlikely(uaddr > end))
> > -               return -EFAULT;
> > -       /*
> > -        * Writing zeroes into userspace here is OK, because we know that if
> > -        * the zero gets there, we'll be overwriting it.
> > -        */
> > -       do {
> > -               if (unlikely(__put_user(0, uaddr) != 0))
> > -                       return -EFAULT;
> > -               uaddr += PAGE_SIZE;
> > -       } while (uaddr <= end);
> > -
> > -       /* Check whether the range spilled into the next page. */
> > -       if (((unsigned long)uaddr & PAGE_MASK) ==
> > -                       ((unsigned long)end & PAGE_MASK))
> > -               return __put_user(0, end);
> > -
> > -       return 0;
> > -}
> > -
> > -static inline int fault_in_pages_readable(const char __user *uaddr, int size)
> > -{
> > -       volatile char c;
> > -       const char __user *end = uaddr + size - 1;
> > -
> > -       if (unlikely(size == 0))
> > -               return 0;
> > -
> > -       if (unlikely(uaddr > end))
> > -               return -EFAULT;
> > -
> > -       do {
> > -               if (unlikely(__get_user(c, uaddr) != 0))
> > -                       return -EFAULT;
> > -               uaddr += PAGE_SIZE;
> > -       } while (uaddr <= end);
> > -
> > -       /* Check whether the range spilled into the next page. */
> > -       if (((unsigned long)uaddr & PAGE_MASK) ==
> > -                       ((unsigned long)end & PAGE_MASK)) {
> > -               return __get_user(c, end);
> > -       }
> > -
> > -       (void)c;
> > -       return 0;
> > -}
> > +size_t fault_in_writeable(char __user *uaddr, size_t size);
> > +size_t fault_in_readable(const char __user *uaddr, size_t size);
> >
> >  int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
> >                                 pgoff_t index, gfp_t gfp_mask);
> > diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> > index 25dfc48536d7..069cedd9d7b4 100644
> > --- a/lib/iov_iter.c
> > +++ b/lib/iov_iter.c
> > @@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
> >         buf = iov->iov_base + skip;
> >         copy = min(bytes, iov->iov_len - skip);
> >
> > -       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
> > +       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
> >                 kaddr = kmap_atomic(page);
> >                 from = kaddr + offset;
> >
> > @@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
> >         buf = iov->iov_base + skip;
> >         copy = min(bytes, iov->iov_len - skip);
> >
> > -       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
> > +       if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
> >                 kaddr = kmap_atomic(page);
> >                 to = kaddr + offset;
> >
> > @@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
> >                         bytes = i->count;
> >                 for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
> >                         size_t len = min(bytes, p->iov_len - skip);
> > -                       int err;
> >
> >                         if (unlikely(!len))
> >                                 continue;
> > -                       err = fault_in_pages_readable(p->iov_base + skip, len);
> > -                       if (unlikely(err))
> > -                               return err;
> > +                       if (fault_in_readable(p->iov_base + skip, len))
> > +                               return -EFAULT;
> >                         bytes -= len;
> >                 }
> >         }
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index d1458ecf2f51..4dec3bc7752e 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -88,7 +88,7 @@
> >   *    ->lock_page              (access_process_vm)
> >   *
> >   *  ->i_mutex                  (generic_perform_write)
> > - *    ->mmap_lock              (fault_in_pages_readable->do_page_fault)
> > + *    ->mmap_lock              (fault_in_readable->do_page_fault)
> >   *
> >   *  bdi->wb.list_lock
> >   *    sb_lock                  (fs/fs-writeback.c)
> > diff --git a/mm/gup.c b/mm/gup.c
> > index b94717977d17..0cf47955e5a1 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -1672,6 +1672,78 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
> >  }
> >  #endif /* !CONFIG_MMU */
> >
> > +/**
> > + * fault_in_writeable - fault in userspace address range for writing
> > + * @uaddr: start of address range
> > + * @size: size of address range
> > + *
> > + * Returns the number of bytes not faulted in (like copy_to_user() and
> > + * copy_from_user()).
> > + */
> > +size_t fault_in_writeable(char __user *uaddr, size_t size)
> > +{
> > +       char __user *start = uaddr, *end;
> > +
> > +       if (unlikely(size == 0))
> > +               return 0;
> > +       if (!PAGE_ALIGNED(uaddr)) {
> > +               if (unlikely(__put_user(0, uaddr) != 0))
> > +                       return size;
> > +               uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
> > +       }
> > +       end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
> > +       if (unlikely(end < start))
> > +               end = NULL;
> > +       while (uaddr != end) {
> > +               if (unlikely(__put_user(0, uaddr) != 0))
> > +                       goto out;
> > +               uaddr += PAGE_SIZE;
>
> Won't we loop endlessly or corrupt some unwanted page when 'end' was
> set to NULL?

What do you mean? We set 'end' to NULL when start + size < start
exactly so that the loop will stop when uaddr wraps around.

> > +       }
> > +
> > +out:
> > +       if (size > uaddr - start)
> > +               return size - (uaddr - start);
> > +       return 0;
> > +}
> > +EXPORT_SYMBOL(fault_in_writeable);
> > +
> > +/**
> > + * fault_in_readable - fault in userspace address range for reading
> > + * @uaddr: start of user address range
> > + * @size: size of user address range
> > + *
> > + * Returns the number of bytes not faulted in (like copy_to_user() and
> > + * copy_from_user()).
> > + */
> > +size_t fault_in_readable(const char __user *uaddr, size_t size)
> > +{
> > +       const char __user *start = uaddr, *end;
> > +       volatile char c;
> > +
> > +       if (unlikely(size == 0))
> > +               return 0;
> > +       if (!PAGE_ALIGNED(uaddr)) {
> > +               if (unlikely(__get_user(c, uaddr) != 0))
> > +                       return size;
> > +               uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
> > +       }
> > +       end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
> > +       if (unlikely(end < start))
> > +               end = NULL;
> > +       while (uaddr != end) {
>
> Same kind of issue here, when 'end' was set to NULL?
>
> Thanks.
>
> > +               if (unlikely(__get_user(c, uaddr) != 0))
> > +                       goto out;
> > +               uaddr += PAGE_SIZE;
> > +       }
> > +
> > +out:
> > +       (void)c;
> > +       if (size > uaddr - start)
> > +               return size - (uaddr - start);
> > +       return 0;
> > +}
> > +EXPORT_SYMBOL(fault_in_readable);
> > +
> >  /**
> >   * get_dump_page() - pin user page in memory while writing it to core dump
> >   * @addr: user address
> > --
> > 2.26.3
> >

Thanks,
Andreas




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux