From: Jeff Xu <jeffxu@xxxxxxxxxxxx> Add merge/split handling for mlock/madvice/mprotect/mmap case. Make sealed VMA mergeable with adjacent VMAs. This is so that we don't run out of VMAs, i.e. there is a max number of VMA per process. Signed-off-by: Jeff Xu <jeffxu@xxxxxxxxxxxx> Suggested-by: Jann Horn <jannh@xxxxxxxxxx> --- fs/userfaultfd.c | 8 +++++--- include/linux/mm.h | 31 +++++++++++++------------------ mm/madvise.c | 2 +- mm/mempolicy.c | 2 +- mm/mlock.c | 2 +- mm/mmap.c | 44 +++++++++++++++++++++----------------------- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- mm/mseal.c | 23 ++++++++++++++++++----- 9 files changed, 62 insertions(+), 54 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 56eaae9dac1a..8ebee7c1c6cf 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -926,7 +926,8 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, anon_vma_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma), + vma_seals(vma)); if (prev) { vma = prev; } else { @@ -1483,7 +1484,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), ((struct vm_userfaultfd_ctx){ ctx }), - anon_vma_name(vma)); + anon_vma_name(vma), vma_seals(vma)); if (prev) { /* vma_merge() invalidated the mas */ vma = prev; @@ -1668,7 +1669,8 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, anon_vma_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma), + vma_seals(vma)); if (prev) { vma = prev; goto next; diff --git a/include/linux/mm.h b/include/linux/mm.h index 5d3ee79f1438..1f162bb5b38d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3243,7 +3243,7 @@ extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, - struct anon_vma_name *); + struct anon_vma_name *, unsigned long vm_seals); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, unsigned long addr, int new_below); @@ -3327,19 +3327,6 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} #endif #ifdef CONFIG_MSEAL -static inline bool check_vma_seals_mergeable(unsigned long vm_seals) -{ - /* - * Set sealed VMA not mergeable with another VMA for now. - * This will be changed in later commit to make sealed - * VMA also mergeable. - */ - if (vm_seals & MM_SEAL_ALL) - return false; - - return true; -} - /* * return the valid sealing (after mask). */ @@ -3353,6 +3340,14 @@ static inline void update_vma_seals(struct vm_area_struct *vma, unsigned long vm vma->vm_seals |= vm_seals; } +static inline bool check_vma_seals_mergeable(unsigned long vm_seals1, unsigned long vm_seals2) +{ + if ((vm_seals1 & MM_SEAL_ALL) != (vm_seals2 & MM_SEAL_ALL)) + return false; + + return true; +} + extern bool can_modify_mm(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long checkSeals); @@ -3390,14 +3385,14 @@ static inline int check_mmap_seals(unsigned long prot, unsigned long *vm_seals) return 0; } #else -static inline bool check_vma_seals_mergeable(unsigned long vm_seals1) +static inline unsigned long vma_seals(struct vm_area_struct *vma) { - return true; + return 0; } -static inline unsigned long vma_seals(struct vm_area_struct *vma) +static inline bool check_vma_seals_mergeable(unsigned long vm_seals1, unsigned long vm_seals2) { - return 0; + return true; } static inline bool can_modify_mm(struct mm_struct *mm, unsigned long start, diff --git a/mm/madvise.c b/mm/madvise.c index 4dded5d27e7e..e2d219a4b6ef 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -152,7 +152,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(&vmi, mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_name); + vma->vm_userfaultfd_ctx, anon_name, vma_seals(vma)); if (*prev) { vma = *prev; goto success; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e52e3a0b8f2e..e70b69c64564 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -836,7 +836,7 @@ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT); merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, new_pol, - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma)); if (merged) { *prev = merged; return vma_replace_policy(merged, new_pol); diff --git a/mm/mlock.c b/mm/mlock.c index 06bdfab83b58..b537a2cbd337 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -428,7 +428,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(vmi, mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma)); if (*prev) { vma = *prev; goto success; diff --git a/mm/mmap.c b/mm/mmap.c index 3e1bf5a131b0..6da8d83f2e66 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -720,7 +720,8 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, static inline bool is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - struct anon_vma_name *anon_name, bool may_remove_vma) + struct anon_vma_name *anon_name, bool may_remove_vma, + unsigned long vm_seals) { /* * VM_SOFTDIRTY should not prevent from VMA merging, if we @@ -740,7 +741,7 @@ static inline bool is_mergeable_vma(struct vm_area_struct *vma, return false; if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) return false; - if (!check_vma_seals_mergeable(vma_seals(vma))) + if (!check_vma_seals_mergeable(vma_seals(vma), vm_seals)) return false; return true; @@ -776,9 +777,10 @@ static bool can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - struct anon_vma_name *anon_name) + struct anon_vma_name *anon_name, unsigned long vm_seals) { - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, true) && + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, + anon_name, true, vm_seals) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { if (vma->vm_pgoff == vm_pgoff) return true; @@ -799,9 +801,10 @@ static bool can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - struct anon_vma_name *anon_name) + struct anon_vma_name *anon_name, unsigned long vm_seals) { - if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name, false) && + if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, + anon_name, false, vm_seals) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { pgoff_t vm_pglen; vm_pglen = vma_pages(vma); @@ -869,7 +872,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - struct anon_vma_name *anon_name) + struct anon_vma_name *anon_name, unsigned long vm_seals) { struct vm_area_struct *curr, *next, *res; struct vm_area_struct *vma, *adjust, *remove, *remove2; @@ -908,7 +911,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, /* Can we merge the predecessor? */ if (addr == prev->vm_end && mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, anon_vma, file, - pgoff, vm_userfaultfd_ctx, anon_name)) { + pgoff, vm_userfaultfd_ctx, anon_name, vm_seals)) { merge_prev = true; vma_prev(vmi); } @@ -917,7 +920,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, /* Can we merge the successor? */ if (next && mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx, anon_name)) { + vm_userfaultfd_ctx, anon_name, vm_seals)) { merge_next = true; } @@ -2727,13 +2730,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr, next = vma_next(&vmi); prev = vma_prev(&vmi); - /* - * For now, sealed VMA doesn't merge with other VMA, - * Will change this in later commit when we make sealed VMA - * also mergeable. - */ - if ((vm_flags & VM_SPECIAL) || - (vm_seals & MM_SEAL_ALL)) { + + if (vm_flags & VM_SPECIAL) { if (prev) vma_iter_next_range(&vmi); goto cannot_expand; @@ -2743,7 +2741,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Check next */ if (next && next->vm_start == end && !vma_policy(next) && can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen, - NULL_VM_UFFD_CTX, NULL)) { + NULL_VM_UFFD_CTX, NULL, vm_seals)) { merge_end = next->vm_end; vma = next; vm_pgoff = next->vm_pgoff - pglen; @@ -2752,9 +2750,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Check prev */ if (prev && prev->vm_end == addr && !vma_policy(prev) && (vma ? can_vma_merge_after(prev, vm_flags, vma->anon_vma, file, - pgoff, vma->vm_userfaultfd_ctx, NULL) : + pgoff, vma->vm_userfaultfd_ctx, NULL, vm_seals) : can_vma_merge_after(prev, vm_flags, NULL, file, pgoff, - NULL_VM_UFFD_CTX, NULL))) { + NULL_VM_UFFD_CTX, NULL, vm_seals))) { merge_start = prev->vm_start; vma = prev; vm_pgoff = prev->vm_pgoff; @@ -2822,7 +2820,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, merge = vma_merge(&vmi, mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, NULL, vma->vm_file, vma->vm_pgoff, NULL, - NULL_VM_UFFD_CTX, NULL); + NULL_VM_UFFD_CTX, NULL, vma_seals(vma)); if (merge) { /* * ->mmap() can change vma->vm_file and fput @@ -3130,14 +3128,14 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) return -ENOMEM; - /* * Expand the existing vma if possible; Note that singular lists do not * occur after forking, so the expand will only happen on new VMAs. */ if (vma && vma->vm_end == addr && !vma_policy(vma) && can_vma_merge_after(vma, flags, NULL, NULL, - addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { + addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL, + vma_seals(vma))) { vma_iter_config(vmi, vma->vm_start, addr + len); if (vma_iter_prealloc(vmi, vma)) goto unacct_fail; @@ -3380,7 +3378,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma = vma_merge(&vmi, mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma)); if (new_vma) { /* * Source vma may have been merged into new_vma diff --git a/mm/mprotect.c b/mm/mprotect.c index 1527188b1e92..a4c90e71607b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -632,7 +632,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(vmi, mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma)); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); diff --git a/mm/mremap.c b/mm/mremap.c index ff7429bfbbe1..357efd6b48b9 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1098,7 +1098,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, vma = vma_merge(&vmi, mm, vma, extension_start, extension_end, vma->vm_flags, vma->anon_vma, vma->vm_file, extension_pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), vma_seals(vma)); if (!vma) { vm_unacct_memory(pages); ret = -ENOMEM; diff --git a/mm/mseal.c b/mm/mseal.c index d12aa628ebdc..3b90dce7d20e 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -7,8 +7,10 @@ * Author: Jeff Xu <jeffxu@xxxxxxxxxxxx> */ +#include <linux/mempolicy.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/mm_inline.h> #include <linux/syscalls.h> #include <linux/sched.h> #include "internal.h" @@ -81,14 +83,25 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long addtypes) { + pgoff_t pgoff; int ret = 0; + unsigned long newtypes = vma_seals(vma) | addtypes; + + if (newtypes != vma_seals(vma)) { + /* + * Attempt to merge with prev and next vma. + */ + pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); + *prev = vma_merge(vmi, vma->vm_mm, *prev, start, end, vma->vm_flags, + vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), + vma->vm_userfaultfd_ctx, anon_vma_name(vma), newtypes); + if (*prev) { + vma = *prev; + goto out; + } - if (addtypes & ~(vma_seals(vma))) { /* * Handle split at start and end. - * For now sealed VMA doesn't merge with other VMAs. - * This will be updated in later commit to make - * sealed VMA also mergeable. */ if (start != vma->vm_start) { ret = split_vma(vmi, vma, start, 1); @@ -102,7 +115,7 @@ static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, goto out; } - vma->vm_seals |= addtypes; + vma->vm_seals = newtypes; } out: -- 2.43.0.472.g3155946c3a-goog