[PATCH v1 09/11] mm: gup: trigger unsharing via FAULT_FLAG_UNSHARE when required (hugetlb)

David Hildenbrand <david@xxxxxxxxxx> · Fri, 17 Dec 2021 12:30:47 +0100

Similar to the !hugetlb variant, invoke unsharing for shared anonymous
pages when required during GUP by setting FOLL_FAULT_UNSHARE in hugetlb
code as well.

FAULT_FLAG_UNSHARE will trigger unsharing of shared anonymous pages during
GUP, resulting in a child process no longer being able to observe memory
modifications performed by the parent after fork() to anonymous shared
hugetlb pages.

This commit is based on prototype patches by Andrea.

Co-developed-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Reviewed-by: Peter Xu <peterx@xxxxxxxxxx>
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
 mm/gup.c     |  3 +--
 mm/hugetlb.c | 43 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 2a83388c3fb4..35d1b28e3829 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -67,8 +67,7 @@ bool gup_must_unshare(unsigned int flags, struct page *page, bool is_head)
 	if (PageKsm(page))
 		return false;
 	if (PageHuge(page))
-		/* TODO: handle hugetlb as well. */
-		return false;
+		return __page_mapcount(page) > 1;
 	if (is_head) {
 		VM_BUG_ON(!PageTransHuge(page));
 		return page_trans_huge_mapcount(page, NULL) > 1;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5f2863b046ef..dc42018ee1a6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5971,6 +5971,25 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
 	}
 }
 
+static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
+					       bool *unshare)
+{
+	pte_t pteval = huge_ptep_get(pte);
+
+	*unshare = false;
+	if (is_swap_pte(pteval))
+		return true;
+	if (huge_pte_write(pteval))
+		return false;
+	if (flags & FOLL_WRITE)
+		return true;
+	if (gup_must_unshare(flags, pte_page(pteval), true)) {
+		*unshare = true;
+		return true;
+	}
+	return false;
+}
+
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			 struct page **pages, struct vm_area_struct **vmas,
 			 unsigned long *position, unsigned long *nr_pages,
@@ -5985,6 +6004,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
 		spinlock_t *ptl = NULL;
+		bool unshare;
 		int absent;
 		struct page *page;
 
@@ -6035,9 +6055,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * both cases, and because we can't follow correct pages
 		 * directly from any kind of swap entries.
 		 */
-		if (absent || is_swap_pte(huge_ptep_get(pte)) ||
-		    ((flags & FOLL_WRITE) &&
-		      !huge_pte_write(huge_ptep_get(pte)))) {
+		if (absent ||
+		    __follow_hugetlb_must_fault(flags, pte, &unshare)) {
 			vm_fault_t ret;
 			unsigned int fault_flags = 0;
 
@@ -6045,6 +6064,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 				spin_unlock(ptl);
 			if (flags & FOLL_WRITE)
 				fault_flags |= FAULT_FLAG_WRITE;
+			else if (unshare)
+				fault_flags |= FAULT_FLAG_UNSHARE;
 			if (locked)
 				fault_flags |= FAULT_FLAG_ALLOW_RETRY |
 					FAULT_FLAG_KILLABLE;
@@ -6734,7 +6755,21 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		goto out;
 	pte = huge_ptep_get((pte_t *)pmd);
 	if (pte_present(pte)) {
-		page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
+		struct page *head_page = pmd_page(*pmd);
+
+		/*
+		 * follow_huge_pmd() is only called when coming via
+		 * follow_page(), where we set FOLL_NOUNSHARE. Ordinary GUP
+		 * goes via follow_hugetlb_page(), where we can properly unshare
+		 * if required.
+		 */
+		if (WARN_ON_ONCE(!huge_pte_write(pte) &&
+				 gup_must_unshare(flags, head_page, true))) {
+			page = NULL;
+			goto out;
+		}
+
+		page = head_page + ((address & ~PMD_MASK) >> PAGE_SHIFT);
 		/*
 		 * try_grab_page() should always succeed here, because: a) we
 		 * hold the pmd (ptl) lock, and b) we've just checked that the
-- 
2.31.1