[PATCH v3 2/2] mm: migrate: fix barriers around tlb_flush_pending

Nadav Amit <namit@xxxxxxxxxx> · Thu, 27 Jul 2017 04:40:15 -0700

Reading tlb_flush_pending while the page-table lock is taken does not
require a barrier, since the lock/unlock already acts as a barrier.
Removing the barrier in mm_tlb_flush_pending() to address this issue.

However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending()
while the page-table lock is already released, which may present a
problem on architectures with weak memory model (PPC). To deal with this
case, a new parameter is added to mm_tlb_flush_pending() to indicate
if it is read without the page-table lock taken, and calling
smp_mb__after_unlock_lock() in this case.

Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
---
 arch/arm/include/asm/pgtable.h   |  3 ++-
 arch/arm64/include/asm/pgtable.h |  3 ++-
 arch/x86/include/asm/pgtable.h   |  2 +-
 include/linux/mm_types.h         | 31 +++++++++++++++++++++++--------
 mm/migrate.c                     |  2 +-
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 1c462381c225..2e0608a8049d 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -223,7 +223,8 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 #define pte_none(pte)		(!pte_val(pte))
 #define pte_present(pte)	(pte_isset((pte), L_PTE_PRESENT))
 #define pte_valid(pte)		(pte_isset((pte), L_PTE_VALID))
-#define pte_accessible(mm, pte)	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
+#define pte_accessible(mm, pte)	(mm_tlb_flush_pending(mm, true) ? \
+					pte_present(pte) : pte_valid(pte))
 #define pte_write(pte)		(pte_isclear((pte), L_PTE_RDONLY))
 #define pte_dirty(pte)		(pte_isset((pte), L_PTE_DIRTY))
 #define pte_young(pte)		(pte_isset((pte), L_PTE_YOUNG))
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c213fdbd056c..47f934d378ca 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -108,7 +108,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
  * remapped as PROT_NONE but are yet to be flushed from the TLB.
  */
 #define pte_accessible(mm, pte)	\
-	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
+	(mm_tlb_flush_pending(mm, true) ? pte_present(pte) : \
+					  pte_valid_young(pte))
 
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f5af95a0c6b8..da16793203dd 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -642,7 +642,7 @@ static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
 		return true;
 
 	if ((pte_flags(a) & _PAGE_PROTNONE) &&
-			mm_tlb_flush_pending(mm))
+			mm_tlb_flush_pending(mm, true))
 		return true;
 
 	return false;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 36f4ec589544..57ab8061a2c0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -522,12 +522,21 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
- * The barriers below prevent the compiler from re-ordering the instructions
- * around the memory barriers that are already present in the code.
+ * The barriers are used to ensure the order between tlb_flush_pending updates,
+ * which happen while the lock is not taken, and the PTE updates, which happen
+ * while the lock is taken, are serialized.
  */
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm, bool pt_locked)
 {
-	barrier();
+	/*
+	 * mm_tlb_flush_pending() is safe if it is executed while the page-table
+	 * lock is taken. But if the lock was already released, there does not
+	 * seem to be a guarantee that a memory barrier. A memory barrier is
+	 * therefore needed on architectures with weak memory models.
+	 */
+	if (!pt_locked)
+		smp_mb__after_unlock_lock();
+
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 static inline void set_tlb_flush_pending(struct mm_struct *mm)
@@ -535,19 +544,25 @@ static inline void set_tlb_flush_pending(struct mm_struct *mm)
 	atomic_inc(&mm->tlb_flush_pending);
 
 	/*
-	 * Guarantee that the tlb_flush_pending store does not leak into the
+	 * Guarantee that the tlb_flush_pending increase does not leak into the
 	 * critical section updating the page tables
 	 */
 	smp_mb__before_spinlock();
 }
-/* Clearing is done after a TLB flush, which also provides a barrier. */
+
 static inline void clear_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
+	/*
+	 * Guarantee that the tlb_flush_pending does not not leak into the
+	 * critical section, since we must order the PTE change and changes to
+	 * the pending TLB flush indication. We could have relied on TLB flush
+	 * as a memory barrier, but this behavior is not clearly documented.
+	 */
+	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
+static inline bool mm_tlb_flush_pending(struct mm_struct *mm, bool pt_locked)
 {
 	return false;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 89a0a1707f4c..169c3165be41 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1939,7 +1939,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	 * We are not sure a pending tlb flush here is for a huge page
 	 * mapping or not. Hence use the tlb range variant
 	 */
-	if (mm_tlb_flush_pending(mm))
+	if (mm_tlb_flush_pending(mm, false))
 		flush_tlb_range(vma, mmun_start, mmun_end);
 
 	/* Prepare a page as a migration target */
-- 
2.11.0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>