[RFC V2 1/2] arm64/mm: Change THP helpers per generic memory semantics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



pmd_present() and pmd_trans_huge() are expected to behave in the following
manner during various phases of a PMD entry. This table is derived from a
previous discussion on this topic [1] and available THP documentation [2].

pmd_present(pmd):

- True if PMD has a mapped huge page i.e valid pmd_page(pmd)
- False if PMD does not have a mapped huge page i.e invalid pmd_page(pmd)

pmd_trans_huge(pmd):

- True if PMD has a mapped huge page and is a THP

-------------------------------------------------------------------------
|	PMD states	|	pmd_present	|	pmd_trans_huge	|
-------------------------------------------------------------------------
|	Mapped		|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Splitting	|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Migration/Swap	|	No		|	No		|
-------------------------------------------------------------------------

Current Problem:-

PMD is invalidated with pmdp_invalidate() before it's splitting. It clears
PMD_SECT_VALID as below.

PMD Split -> pmdp_invalidate() -> pmd_mkinvalid() -> Clears PMD_SECT_VALID

Once PMD_SECT_VALID gets cleared, pmd_present() returns false. It will need
a separate page table bit apart from PMD_SECT_VALID, in order to reaffirm
pmd_present() as true during this THP split process. To comply with above
mentioned semantics, pmd_trans_huge() should also check pmd_present() first
before testing presence of an actual transparent huge page mapping.

PMD_TYPE_SECT should have been used here. But it shares bit position with
PMD_SECT_VALID which gets cleared during THP invalidation. Hence, it cannot
be used for pmd_present() after pmdp_invalidate().

Proposed Solution:-

PMD_TABLE_BIT can be set during pmdp_invalidate() process which will make
pmd_present() return true as required. But then, PMD_TABLE_BIT needs to be
cleared when PMD gets mapped again.

This replaces pmd_present() with pte_present() in huge_pte_offset() just to
preserve the existing semantics.

[1]: https://lore.kernel.org/linux-mm/20181017020930.GN30832@xxxxxxxxxx/
[2]: https://www.kernel.org/doc/Documentation/vm/transhuge.txt

Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Marc Zyngier <maz@xxxxxxxxxx>
Cc: Suzuki Poulose <suzuki.poulose@xxxxxxx>
Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
 arch/arm64/include/asm/pgtable.h | 92 ++++++++++++++++++++++++++++----
 arch/arm64/mm/hugetlbpage.c      |  2 +-
 arch/arm64/mm/mmu.c              | 20 +++++++
 3 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6dbd267ab931..560be593a8dc 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -353,15 +353,92 @@ static inline int pmd_protnone(pmd_t pmd)
 }
 #endif
 
+#define pmd_table(pmd)	((pmd_val(pmd) & PMD_TYPE_MASK) ==  PMD_TYPE_TABLE)
+#define pmd_sect(pmd)	((pmd_val(pmd) & PMD_TYPE_MASK) ==  PMD_TYPE_SECT)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
- * THP definitions.
+ * PMD Level Encoding (THP Enabled)
+ *
+ * 0b00 - Not valid	Not present	NA
+ * 0b10 - Not valid	Present		Huge  (Splitting)
+ * 0b01 - Valid		Present		Huge  (Mapped)
+ * 0b11 - Valid		Present		Table (Mapped)
  */
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+	unsigned long val = pmd_val(pmd);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+	return __pmd((val & ~PMD_TYPE_MASK) | PMD_TABLE_BIT);
+}
+
+static inline pmd_t pmd_clrsplitting(pmd_t pmd)
+{
+	unsigned long val = pmd_val(pmd);
+
+	return __pmd((val & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
+}
+
+static inline bool pmd_splitting(pmd_t pmd)
+{
+	unsigned long val = pmd_val(pmd);
+
+	if ((val & PMD_TYPE_MASK) == PMD_TABLE_BIT)
+		return true;
+	return false;
+}
+
+static inline bool pmd_mapped(pmd_t pmd)
+{
+	return pmd_sect(pmd);
+}
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+	/*
+	 * Invalidation should not have been invoked on
+	 * a PMD table entry. Just warn here otherwise.
+	 */
+	WARN_ON(pmd_table(pmd));
+	return pmd_mksplitting(pmd);
+}
+
+static inline int pmd_present(pmd_t pmd);
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	if (!pmd_present(pmd))
+		return 0;
+
+	if (!pmd_val(pmd))
+		return 0;
+
+	if (pmd_mapped(pmd))
+		return 1;
+
+	if (pmd_splitting(pmd))
+		return 1;
+	return 0;
+}
+
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-#define pmd_present(pmd)	pte_present(pmd_pte(pmd))
+static inline int pmd_present(pmd_t pmd)
+{
+	pte_t pte = pmd_pte(pmd);
+
+	if (pte_present(pte))
+		return 1;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_splitting(pmd))
+		return 1;
+#endif
+	return 0;
+}
+
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_valid(pmd)		pte_valid(pmd_pte(pmd))
@@ -371,7 +448,6 @@ static inline int pmd_protnone(pmd_t pmd)
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkinvalid(pmd)	(__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
 
 #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
 
@@ -404,8 +480,6 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 #define pud_pfn(pud)		((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
 #define pfn_pud(pfn,prot)	__pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
-#define set_pmd_at(mm, addr, pmdp, pmd)	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
-
 #define __p4d_to_phys(p4d)	__pte_to_phys(p4d_pte(p4d))
 #define __phys_to_p4d_val(phys)	__phys_to_pte_val(phys)
 
@@ -448,10 +522,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 #define pmd_bad(pmd)		(!(pmd_val(pmd) & PMD_TABLE_BIT))
 
-#define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
-				 PMD_TYPE_TABLE)
-#define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
-				 PMD_TYPE_SECT)
 #define pmd_leaf(pmd)		pmd_sect(pmd)
 
 #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0a52ce46f020..79a8d5c8d4f4 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -294,7 +294,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 	if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
 	    pmd_none(pmd))
 		return NULL;
-	if (pmd_huge(pmd) || !pmd_present(pmd))
+	if (pmd_huge(pmd) || !pte_present(pmd_pte(pmd)))
 		return (pte_t *)pmdp;
 
 	if (sz == CONT_PTE_SIZE)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 990929c8837e..337519031115 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -22,6 +22,8 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 
 #include <asm/barrier.h>
 #include <asm/cputype.h>
@@ -1483,3 +1485,21 @@ static int __init prevent_bootmem_remove_init(void)
 }
 device_initcall(prevent_bootmem_remove_init);
 #endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	/*
+	 * PMD migration entries need to retain splitting PMD
+	 * representation created with pmdp_invalidate(). But
+	 * any non-migration entry which just might have been
+	 * invalidated previously, still need be a normal huge
+	 * page. Hence selectively clear splitting entries.
+	 */
+	if (!is_migration_entry(pmd_to_swp_entry(pmd)))
+		pmd = pmd_clrsplitting(pmd);
+
+	set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+}
+#endif
-- 
2.20.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux