[RFC PATCH] mm: fixup orphan avc cleanup logic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Existing logic failed to reparent the anon_vma whose avc was removed which
resulted in assertion failures.

This patch corrects this, fixes up some comments, and does some other
cleanups.

We also do not do anything relating to anon_vma->parent manipulation if no
orphaned AVC is found.

I still feel this logic is highly dubious, but this does fix the issue with
anon_vma->num_children accounting.

This doesn't correctly handle locking of the reparented anon_vma.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx>
---
 include/linux/rmap.h |   2 +-
 mm/memory.c          |   2 +-
 mm/rmap.c            | 101 ++++++++++++++++++++++++++++---------------
 3 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 8607d28a3146..f1a835f54064 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -257,7 +257,7 @@ void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages,
 	folio_remove_rmap_ptes(folio, page, 1, vma)
 void folio_remove_rmap_pmd(struct folio *, struct page *,
 		struct vm_area_struct *);
-void folio_remove_anon_avc(struct folio *, struct vm_area_struct *);
+void cleanup_orphan_avc(struct folio *, struct vm_area_struct *);
 
 void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *,
 		unsigned long address, rmap_t flags);
diff --git a/mm/memory.c b/mm/memory.c
index 4c89cb1cb73e..989b078dd860 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3435,7 +3435,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 			 * between vma and the old_folio's anon_vma is removed,
 			 * avoiding rmap redundant overhead.
 			 */
-			folio_remove_anon_avc(old_folio, vma);
+			cleanup_orphan_avc(old_folio, vma);
 		}
 
 		/* Free the old page.. */
diff --git a/mm/rmap.c b/mm/rmap.c
index 56fc16fcf2a9..3ac264962917 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1523,56 +1523,87 @@ void folio_add_file_rmap_pmd(struct folio *folio, struct page *page,
 }
 
 /**
- * folio_remove_anon_avc - remove the avc binding relationship between
- * folio and vma with different anon_vmas.
- * @folio:	The folio with anon_vma to remove the binded avc from
- * @vma:	The vm area to remove the binded avc with folio's anon_vma
+ * cleanup_orphan_avc - remove the avc binding relationship between a parent
+ * folio and child vma with different anon_vmas which, due to an operation such
+ * as CoW'ing a folio, is no longer meaningful.
  *
- * The caller is currently used for CoWed scene.
+ * (insert ASCII diagrams and explanation here...)
+ *
+ * @old_folio:  The folio which contains the parent anon_vma which has an unneeded
+ *              avc binding.
+ * @new_vma:	The VMA which is unnecessarily bound to folio.
  */
-void folio_remove_anon_avc(struct folio *folio,
-		struct vm_area_struct *vma)
+void cleanup_orphan_avc(struct folio *old_folio, struct vm_area_struct *new_vma)
 {
-	struct anon_vma *anon_vma = folio_anon_vma(folio);
+	struct anon_vma *parent_anon_vma = folio_anon_vma(old_folio);
+	struct anon_vma *child_anon_vma = new_vma->anon_vma;
 	pgoff_t pgoff_start, pgoff_end;
 	struct anon_vma_chain *avc;
+	bool removed = false;
 
 	/*
-	 * Ensure that the vma's anon_vma and the folio's
-	 * anon_vma exist and are not same.
+	 * If this folio were not anonymous, folio_anon_vma() would have
+	 * returned NULL. Equally, if the parent and child anon_vma objects are
+	 * the same, then we have nothing to do here.
 	 */
-	if (!folio_test_anon(folio) || unlikely(!anon_vma) ||
-	    anon_vma == vma->anon_vma)
+	if (!parent_anon_vma || parent_anon_vma == child_anon_vma)
 		return;
 
-	pgoff_start = folio_pgoff(folio);
-	pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
+	pgoff_start = folio_pgoff(old_folio);
+	pgoff_end = pgoff_start + folio_nr_pages(old_folio) - 1;
 
-	if (!anon_vma_trylock_write(anon_vma))
+	/* This is an optimistic attempt. */
+	if (!anon_vma_trylock_write(parent_anon_vma))
 		return;
 
-	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
-			pgoff_start, pgoff_end) {
-		/*
-		 * Find the avc associated with vma from the folio's
-		 * anon_vma and remove it.
-		 */
-		if (avc->vma == vma) {
-			anon_vma_interval_tree_remove(avc, &anon_vma->rb_root);
-			/*
-			 * When removing the avc with anon_vma that is
-			 * different from the parent anon_vma from parent
-			 * anon_vma->rb_root, the parent num_children
-			 * count value is needed to reduce one.
-			 */
-			anon_vma->num_children--;
+	/*
+	 * Iterate through all AVC's tied to the old folio, looking for the
+	 * redundant one pointing at the new VMA.
+	 */
+	anon_vma_interval_tree_foreach(avc, &parent_anon_vma->rb_root,
+				       pgoff_start, pgoff_end) {
+		if (avc->vma != new_vma)
+			continue;
 
-			list_del(&avc->same_vma);
-			anon_vma_chain_free(avc);
-			break;
-		}
+		/* Remove the unneeded avc. */
+		anon_vma_interval_tree_remove(avc, &parent_anon_vma->rb_root);
+		list_del(&avc->same_vma);
+		anon_vma_chain_free(avc);
+
+		removed = true;
+		break;
 	}
-	anon_vma_unlock_write(anon_vma);
+
+	if (!removed)
+		goto unlock;
+
+	/*
+	 * Removing an avc implies that the associated avc MAY no longer need
+	 * to point to its parent, and we need to reparent it.
+	 */
+
+	/*
+	 * If somehow we aren't already the child of the parent anon_vma, we
+	 * have nothing to do here.
+	 */
+	if (child_anon_vma->parent != parent_anon_vma)
+		goto unlock;
+
+	/* OK, we abandon our parent, and reparent to ourselves. */
+
+	parent_anon_vma->num_children--;
+
+	child_anon_vma->parent = child_anon_vma;
+	child_anon_vma->num_children++;
+
+	/*
+	 * Here we should probably reset the anon_vma->root, as per
+	 * anon_vma_ctor() but this feels icky and horrible. Bit weird to share
+	 * a lock with the old parent's root.
+	 */
+
+unlock:
+	anon_vma_unlock_write(parent_anon_vma);
 }
 
 static __always_inline void __folio_remove_rmap(struct folio *folio,
-- 
2.46.0


--vpqxaifkk44w6k3q--




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux