From: Nadav Amit <namit@xxxxxxxxxx> As the next patches are going to introduce more information that needs to be propagated regarding handled user requests, introduce uffd_flags that would be used to propagate this information. Remove the unused UFFD_FLAGS_SET to avoid confusion in the constant names. Introducing uffd flags also allows to avoid mm/userfaultfd from being using uapi (e.g., UFFDIO_COPY_MODE_WP). Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx> Acked-by: David Hildenbrand <david@xxxxxxxxxx> Signed-off-by: Nadav Amit <namit@xxxxxxxxxx> --- fs/userfaultfd.c | 21 ++++++++++---- include/linux/hugetlb.h | 4 +-- include/linux/shmem_fs.h | 8 ++++-- include/linux/userfaultfd_k.h | 24 ++++++++++------ mm/hugetlb.c | 3 +- mm/shmem.c | 6 ++-- mm/userfaultfd.c | 53 ++++++++++++++++++----------------- 7 files changed, 70 insertions(+), 49 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index d398f6bf6d74..a44e46f8249f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1700,6 +1700,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, struct uffdio_copy uffdio_copy; struct uffdio_copy __user *user_uffdio_copy; struct userfaultfd_wake_range range; + bool mode_wp; + uffd_flags_t uffd_flags; user_uffdio_copy = (struct uffdio_copy __user *) arg; @@ -1726,10 +1728,15 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, goto out; if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP)) goto out; + + mode_wp = uffdio_copy.mode & UFFDIO_COPY_MODE_WP; + + uffd_flags = mode_wp ? UFFD_FLAGS_WP : UFFD_FLAGS_NONE; + if (mmget_not_zero(ctx->mm)) { ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src, uffdio_copy.len, &ctx->mmap_changing, - uffdio_copy.mode); + uffd_flags); mmput(ctx->mm); } else { return -ESRCH; @@ -1757,6 +1764,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, struct uffdio_zeropage uffdio_zeropage; struct uffdio_zeropage __user *user_uffdio_zeropage; struct userfaultfd_wake_range range; + uffd_flags_t uffd_flags = UFFD_FLAGS_NONE; user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; @@ -1781,7 +1789,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, if (mmget_not_zero(ctx->mm)) { ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len, - &ctx->mmap_changing); + &ctx->mmap_changing, uffd_flags); mmput(ctx->mm); } else { return -ESRCH; @@ -1810,6 +1818,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, struct uffdio_writeprotect __user *user_uffdio_wp; struct userfaultfd_wake_range range; bool mode_wp, mode_dontwake; + uffd_flags_t uffd_flags; if (atomic_read(&ctx->mmap_changing)) return -EAGAIN; @@ -1835,10 +1844,12 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, if (mode_wp && mode_dontwake) return -EINVAL; + uffd_flags = mode_wp ? UFFD_FLAGS_WP : UFFD_FLAGS_NONE; + if (mmget_not_zero(ctx->mm)) { ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, - uffdio_wp.range.len, mode_wp, - &ctx->mmap_changing); + uffdio_wp.range.len, + &ctx->mmap_changing, uffd_flags); mmput(ctx->mm); } else { return -ESRCH; @@ -1891,7 +1902,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) if (mmget_not_zero(ctx->mm)) { ret = mcopy_continue(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len, - &ctx->mmap_changing); + &ctx->mmap_changing, 0); mmput(ctx->mm); } else { return -ESRCH; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 642a39016f9a..a4f326bc2de6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -166,7 +166,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, unsigned long src_addr, enum mcopy_atomic_mode mode, struct page **pagep, - bool wp_copy); + uffd_flags_t uffd_flags); #endif /* CONFIG_USERFAULTFD */ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, @@ -366,7 +366,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, unsigned long src_addr, enum mcopy_atomic_mode mode, struct page **pagep, - bool wp_copy) + uffd_flags_t uffd_flags) { BUG(); return 0; diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a68f982f22d1..f93a3c114002 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -9,6 +9,7 @@ #include <linux/percpu_counter.h> #include <linux/xattr.h> #include <linux/fs_parser.h> +#include <linux/userfaultfd_k.h> /* inode in-kernel data */ @@ -145,11 +146,12 @@ extern int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, - bool zeropage, bool wp_copy, - struct page **pagep); + bool zeropage, + struct page **pagep, + uffd_flags_t uffd_flags); #else /* !CONFIG_SHMEM */ #define shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \ - src_addr, zeropage, wp_copy, pagep) ({ BUG(); 0; }) + src_addr, zeropage, pagep, uffd_flags) ({ BUG(); 0; }) #endif /* CONFIG_SHMEM */ #endif /* CONFIG_USERFAULTFD */ diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index eee374c29c85..d5b3dff48a87 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -34,7 +34,6 @@ #define UFFD_NONBLOCK O_NONBLOCK #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) -#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS) extern int sysctl_unprivileged_userfaultfd; @@ -56,23 +55,30 @@ enum mcopy_atomic_mode { MCOPY_ATOMIC_CONTINUE, }; +typedef unsigned int __bitwise uffd_flags_t; + +#define UFFD_FLAGS_NONE ((__force uffd_flags_t)0) +#define UFFD_FLAGS_WP ((__force uffd_flags_t)BIT(0)) + extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, struct page *page, - bool newly_allocated, bool wp_copy); + bool newly_allocated, + uffd_flags_t uffd_flags); extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, - atomic_t *mmap_changing, __u64 mode); -extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, - unsigned long dst_start, - unsigned long len, - atomic_t *mmap_changing); + atomic_t *mmap_changing, uffd_flags_t uffd_flags); +extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long dst_start, + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t uffd_flags); extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, - unsigned long len, atomic_t *mmap_changing); + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t uffd_flags); extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, - bool enable_wp, atomic_t *mmap_changing); + atomic_t *mmap_changing, + uffd_flags_t uffd_flags); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2bc9d1170e4f..2beff8a4bf7c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5875,9 +5875,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, unsigned long src_addr, enum mcopy_atomic_mode mode, struct page **pagep, - bool wp_copy) + uffd_flags_t uffd_flags) { bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE); + bool wp_copy = uffd_flags & UFFD_FLAGS_WP; struct hstate *h = hstate_vma(dst_vma); struct address_space *mapping = dst_vma->vm_file->f_mapping; pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr); diff --git a/mm/shmem.c b/mm/shmem.c index 12ac67dc831f..89c775275bae 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2343,8 +2343,8 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, - bool zeropage, bool wp_copy, - struct page **pagep) + bool zeropage, struct page **pagep, + uffd_flags_t uffd_flags) { struct inode *inode = file_inode(dst_vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); @@ -2418,7 +2418,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - page, true, wp_copy); + page, true, uffd_flags); if (ret) goto out_delete_from_cache; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 07d3befc80e4..734de6aa0b8e 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -58,7 +58,7 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm, int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, struct page *page, - bool newly_allocated, bool wp_copy) + bool newly_allocated, uffd_flags_t uffd_flags) { int ret; pte_t _dst_pte, *dst_pte; @@ -78,7 +78,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, * Always mark a PTE as write-protected when needed, regardless of * VM_WRITE, which the user might change. */ - if (wp_copy) { + if (uffd_flags & UFFD_FLAGS_WP) { _dst_pte = pte_mkuffd_wp(_dst_pte); writable = false; } @@ -145,7 +145,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, unsigned long dst_addr, unsigned long src_addr, struct page **pagep, - bool wp_copy) + uffd_flags_t uffd_flags) { void *page_kaddr; int ret; @@ -189,7 +189,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, goto out_release; ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - page, true, wp_copy); + page, true, uffd_flags); if (ret) goto out_release; out: @@ -239,7 +239,7 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, - bool wp_copy) + uffd_flags_t uffd_flags) { struct inode *inode = file_inode(dst_vma->vm_file); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); @@ -263,7 +263,7 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm, } ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - page, false, wp_copy); + page, false, uffd_flags); if (ret) goto out_release; @@ -309,7 +309,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long src_start, unsigned long len, enum mcopy_atomic_mode mode, - bool wp_copy) + uffd_flags_t uffd_flags) { int vm_shared = dst_vma->vm_flags & VM_SHARED; ssize_t err; @@ -406,7 +406,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, src_addr, mode, &page, - wp_copy); + uffd_flags); mutex_unlock(&hugetlb_fault_mutex_table[hash]); i_mmap_unlock_read(mapping); @@ -462,7 +462,7 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, unsigned long src_start, unsigned long len, enum mcopy_atomic_mode mode, - bool wp_copy); + uffd_flags_t uffd_flags); #endif /* CONFIG_HUGETLB_PAGE */ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, @@ -472,13 +472,13 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, unsigned long src_addr, struct page **page, enum mcopy_atomic_mode mode, - bool wp_copy) + uffd_flags_t uffd_flags) { ssize_t err; if (mode == MCOPY_ATOMIC_CONTINUE) { return mcontinue_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - wp_copy); + uffd_flags); } /* @@ -495,7 +495,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, if (mode == MCOPY_ATOMIC_NORMAL) err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, page, - wp_copy); + uffd_flags); else err = mfill_zeropage_pte(dst_mm, dst_pmd, dst_vma, dst_addr); @@ -503,7 +503,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, src_addr, mode != MCOPY_ATOMIC_NORMAL, - wp_copy, page); + page, uffd_flags); } return err; @@ -515,7 +515,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, unsigned long len, enum mcopy_atomic_mode mcopy_mode, atomic_t *mmap_changing, - __u64 mode) + uffd_flags_t uffd_flags) { struct vm_area_struct *dst_vma; ssize_t err; @@ -523,7 +523,6 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, unsigned long src_addr, dst_addr; long copied; struct page *page; - bool wp_copy; /* * Sanitize the command parameters: @@ -570,11 +569,10 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, goto out_unlock; /* - * validate 'mode' now that we know the dst_vma: don't allow + * validate 'flags' now that we know the dst_vma: don't allow * a wrprotect copy if the userfaultfd didn't register as WP. */ - wp_copy = mode & UFFDIO_COPY_MODE_WP; - if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP)) + if ((uffd_flags & UFFD_FLAGS_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) goto out_unlock; /* @@ -583,7 +581,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, if (is_vm_hugetlb_page(dst_vma)) return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start, src_start, len, mcopy_mode, - wp_copy); + uffd_flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; @@ -635,7 +633,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, BUG_ON(pmd_trans_huge(*dst_pmd)); err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, - src_addr, &page, mcopy_mode, wp_copy); + src_addr, &page, mcopy_mode, uffd_flags); cond_resched(); if (unlikely(err == -ENOENT)) { @@ -683,30 +681,33 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, unsigned long src_start, unsigned long len, - atomic_t *mmap_changing, __u64 mode) + atomic_t *mmap_changing, uffd_flags_t uffd_flags) { return __mcopy_atomic(dst_mm, dst_start, src_start, len, - MCOPY_ATOMIC_NORMAL, mmap_changing, mode); + MCOPY_ATOMIC_NORMAL, mmap_changing, uffd_flags); } ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing) + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t uffd_flags) { return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE, mmap_changing, 0); } ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, atomic_t *mmap_changing) + unsigned long len, atomic_t *mmap_changing, + uffd_flags_t uffd_flags) { return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE, mmap_changing, 0); } int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, - unsigned long len, bool enable_wp, - atomic_t *mmap_changing) + unsigned long len, + atomic_t *mmap_changing, uffd_flags_t uffd_flags) { + bool enable_wp = uffd_flags & UFFD_FLAGS_WP; struct vm_area_struct *dst_vma; unsigned long page_mask; struct mmu_gather tlb; -- 2.25.1