Cc: Alistair Popple <apopple@xxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxxxxxxx>
Cc: Nadav Amit <nadav.amit@xxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
As discussed in [2], this is supposed to replace the fix by Peter:
[PATCH v3 1/2] mm/migrate: Fix read-only page got writable when recover
pte
This survives vm/selftests and my reproducers:
* migrating pages that are uffd-wp'ed using mbind() on a machine with 2
NUMA nodes
* Using a PROT_WRITE mapping with uffd-wp
* Using a PROT_READ|PROT_WRITE mapping with uffd-wp'ed pages and
mprotect()'ing it PROT_WRITE
* Using a PROT_READ|PROT_WRITE mapping with uffd-wp'ed pages and
temporarily mprotect()'ing it PROT_READ
uffd-wp properly triggers in all cases. On v8.1-rc8, all mre reproducers
fail.
It would be good to get some more testing feedback and review.
[2] https://lkml.kernel.org/r/20221202122748.113774-1-david@xxxxxxxxxx
---
fs/userfaultfd.c | 28 ++++++++++++++++++++++------
mm/mmap.c | 4 ++++
2 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 98ac37e34e3d..fb0733f2e623 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx)
return ctx->features & UFFD_FEATURE_INITIALIZED;
}
+static void userfaultfd_set_vm_flags(struct vm_area_struct *vma,
+ vm_flags_t flags)
+{
+ const bool uffd_wp = !!((vma->vm_flags | flags) & VM_UFFD_WP);
+
+ vma->vm_flags = flags;
+ /*
+ * For shared mappings, we want to enable writenotify while
+ * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply
+ * recalculate vma->vm_page_prot whenever userfaultfd-wp is involved.
+ */
+ if ((vma->vm_flags & VM_SHARED) && uffd_wp)
+ vma_set_page_prot(vma);
+}
+
static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
int wake_flags, void *key)
{
@@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
for_each_vma(vmi, vma) {
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma,
+ vma->vm_flags & ~__VM_UFFD_FLAGS);
}
}
mmap_write_unlock(mm);
@@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
octx = vma->vm_userfaultfd_ctx.ctx;
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
return 0;
}
@@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
} else {
/* Drop uffd context if remap feature not enabled */
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~__VM_UFFD_FLAGS;
+ userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS);
}
}
@@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
prev = vma;
}
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
}
mmap_write_unlock(mm);
@@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* the next vma was merged into the current one and
* the current one has not been updated yet.
*/
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx.ctx = ctx;
if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
@@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* the next vma was merged into the current one and
* the current one has not been updated yet.
*/
- vma->vm_flags = new_flags;
+ userfaultfd_set_vm_flags(vma, new_flags);
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
skip:
diff --git a/mm/mmap.c b/mm/mmap.c
index a5eb2f175da0..6033d20198b0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1525,6 +1525,10 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return 1;
+ /* Do we need write faults for uffd-wp tracking? */
+ if (userfaultfd_wp(vma))
+ return 1;
+
/* Specialty mapping? */
if (vm_flags & VM_PFNMAP)
return 0;
base-commit: 8ed710da2873c2aeb3bb805864a699affaf1d03b