---
include/linux/mm.h | 24 ++++++++++++++++-----
mm/mmap.c | 53 +++++++++++++++++++++++++++++++++++-----------
mm/mremap.c | 13 ++++++++++++
3 files changed, 73 insertions(+), 17 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 906b9e06f18e..5d45b7d8718d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2343,18 +2343,32 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
/* mmap.c */
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
+
extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
- struct vm_area_struct *expand);
+ struct vm_area_struct *expand, bool keep_locked);
+
static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
{
- return __vma_adjust(vma, start, end, pgoff, insert, NULL);
+ return __vma_adjust(vma, start, end, pgoff, insert, NULL, false);
}
-extern struct vm_area_struct *vma_merge(struct mm_struct *,
+
+extern struct vm_area_struct *__vma_merge(struct mm_struct *mm,
+ struct vm_area_struct *prev, unsigned long addr, unsigned long end,
+ unsigned long vm_flags, struct anon_vma *anon, struct file *file,
+ pgoff_t pgoff, struct mempolicy *mpol,
+ struct vm_userfaultfd_ctx uff, bool keep_locked);
+
+static inline struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
- unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- struct mempolicy *, struct vm_userfaultfd_ctx);
+ unsigned long vm_flags, struct anon_vma *anon, struct file *file,
+ pgoff_t off, struct mempolicy *pol, struct vm_userfaultfd_ctx uff)
+{
+ return __vma_merge(mm, prev, addr, end, vm_flags, anon, file, off,
+ pol, uff, false);
+}
+
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
unsigned long addr, int new_below);
diff --git a/mm/mmap.c b/mm/mmap.c
index b77ec0149249..13460b38b0fb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -714,7 +714,7 @@ static inline void __vma_unlink_prev(struct mm_struct *mm,
*/
int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
- struct vm_area_struct *expand)
+ struct vm_area_struct *expand, bool keep_locked)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *next = vma->vm_next, *orig_vma = vma;
@@ -830,8 +830,12 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
importer->anon_vma = exporter->anon_vma;
error = anon_vma_clone(importer, exporter);
- if (error)
+ if (error) {
+ if (next && next != vma)
+ vm_raw_write_end(next);
+ vm_raw_write_end(vma);
return error;
+ }
}
}
again:
@@ -1025,7 +1029,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
if (next && next != vma)
vm_raw_write_end(next);
- vm_raw_write_end(vma);
+ if (!keep_locked)
+ vm_raw_write_end(vma);
validate_mm(mm);
@@ -1161,12 +1166,13 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
* parameter) may establish ptes with the wrong permissions of NNNN
* instead of the right permissions of XXXX.
*/
-struct vm_area_struct *vma_merge(struct mm_struct *mm,
+struct vm_area_struct *__vma_merge(struct mm_struct *mm,
struct vm_area_struct *prev, unsigned long addr,
unsigned long end, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
pgoff_t pgoff, struct mempolicy *policy,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx)
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ bool keep_locked)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
@@ -1214,10 +1220,11 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
/* cases 1, 6 */
err = __vma_adjust(prev, prev->vm_start,
next->vm_end, prev->vm_pgoff, NULL,
- prev);
+ prev, keep_locked);
} else /* cases 2, 5, 7 */
err = __vma_adjust(prev, prev->vm_start,
- end, prev->vm_pgoff, NULL, prev);
+ end, prev->vm_pgoff, NULL, prev,
+ keep_locked);
if (err)
return NULL;
khugepaged_enter_vma_merge(prev, vm_flags);
@@ -1234,10 +1241,12 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
vm_userfaultfd_ctx)) {
if (prev && addr < prev->vm_end) /* case 4 */
err = __vma_adjust(prev, prev->vm_start,
- addr, prev->vm_pgoff, NULL, next);
+ addr, prev->vm_pgoff, NULL, next,
+ keep_locked);
else { /* cases 3, 8 */
err = __vma_adjust(area, addr, next->vm_end,
- next->vm_pgoff - pglen, NULL, next);
+ next->vm_pgoff - pglen, NULL, next,
+ keep_locked);
/*
* In case 3 area is already equal to next and
* this is a noop, but in case 8 "area" has
@@ -3259,9 +3268,20 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
return NULL; /* should never get here */
- new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
- vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
- vma->vm_userfaultfd_ctx);
+
+ /* There is 3 cases to manage here in
+ * AAAA AAAA AAAA AAAA
+ * PPPP.... PPPP......NNNN PPPP....NNNN PP........NN
+ * PPPPPPPP(A) PPPP..NNNNNNNN(B) PPPPPPPPPPPP(1) NULL
+ * PPPPPPPPNNNN(2)
+ * PPPPNNNNNNNN(3)
+ *
+ * new_vma == prev in case A,1,2
+ * new_vma == next in case B,3
+ */
+ new_vma = __vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
+ vma->anon_vma, vma->vm_file, pgoff,
+ vma_policy(vma), vma->vm_userfaultfd_ctx, true);
if (new_vma) {
/*
* Source vma may have been merged into new_vma
@@ -3299,6 +3319,15 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
+ /*
+ * As the VMA is linked right now, it may be hit by the
+ * speculative page fault handler. But we don't want it to
+ * to start mapping page in this area until the caller has
+ * potentially move the pte from the moved VMA. To prevent
+ * that we protect it right now, and let the caller unprotect
+ * it once the move is done.
+ */