Le 27/02/2020 à 11:33, Anshuman Khandual a écrit :
This adds new tests validating arch page table helpers for these following
core memory features. These tests create and test specific mapping types at
various page table levels.
* SPECIAL mapping
* PROTNONE mapping
* DEVMAP mapping
* SOFTDIRTY mapping
* SWAP mapping
* MIGRATION mapping
* HUGETLB mapping
* THP mapping
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx>
Cc: Vineet Gupta <vgupta@xxxxxxxxxxxx>
Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
Cc: Vasily Gorbik <gor@xxxxxxxxxxxxx>
Cc: Christian Borntraeger <borntraeger@xxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Borislav Petkov <bp@xxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Kirill A. Shutemov <kirill@xxxxxxxxxxxxx>
Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx>
Cc: Palmer Dabbelt <palmer@xxxxxxxxxxx>
Cc: linux-snps-arc@xxxxxxxxxxxxxxxxxxx
Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Cc: linux-s390@xxxxxxxxxxxxxxx
Cc: linux-riscv@xxxxxxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
Cc: linux-arch@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
Suggested-by: Catalin Marinas <catalin.marinas@xxxxxxx>
Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
Tested on arm64 and x86 platforms without any test failures. But this has
only been built tested on several other platforms. Individual tests need
to be verified on all current enabling platforms for the test i.e s390,
ppc32, arc etc.
This patch must be applied on v5.6-rc3 after these patches
1. https://patchwork.kernel.org/patch/11385057/
2. https://patchwork.kernel.org/patch/11407715/
OR
This patch must be applied on linux-next (next-20200227) after this patch
2. https://patchwork.kernel.org/patch/11407715/
mm/debug_vm_pgtable.c | 310 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 309 insertions(+), 1 deletion(-)
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 96dd7d574cef..3fb90d5b604e 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -41,6 +41,44 @@
* wrprotect(entry) = A write protected and not a write entry
* pxx_bad(entry) = A mapped and non-table entry
* pxx_same(entry1, entry2) = Both entries hold the exact same value
+ *
+ * Specific feature operations
+ *
+ * pte_mkspecial(entry) = Creates a special entry at PTE level
+ * pte_special(entry) = Tests a special entry at PTE level
+ *
+ * pte_protnone(entry) = Tests a no access entry at PTE level
+ * pmd_protnone(entry) = Tests a no access entry at PMD level
+ *
+ * pte_mkdevmap(entry) = Creates a device entry at PTE level
+ * pmd_mkdevmap(entry) = Creates a device entry at PMD level
+ * pud_mkdevmap(entry) = Creates a device entry at PUD level
+ * pte_devmap(entry) = Tests a device entry at PTE level
+ * pmd_devmap(entry) = Tests a device entry at PMD level
+ * pud_devmap(entry) = Tests a device entry at PUD level
+ *
+ * pte_mksoft_dirty(entry) = Creates a soft dirty entry at PTE level
+ * pmd_mksoft_dirty(entry) = Creates a soft dirty entry at PMD level
+ * pte_swp_mksoft_dirty(entry) = Creates a soft dirty swap entry at PTE level
+ * pmd_swp_mksoft_dirty(entry) = Creates a soft dirty swap entry at PMD level
+ * pte_soft_dirty(entry) = Tests a soft dirty entry at PTE level
+ * pmd_soft_dirty(entry) = Tests a soft dirty entry at PMD level
+ * pte_swp_soft_dirty(entry) = Tests a soft dirty swap entry at PTE level
+ * pmd_swp_soft_dirty(entry) = Tests a soft dirty swap entry at PMD level
+ * pte_clear_soft_dirty(entry) = Clears a soft dirty entry at PTE level
+ * pmd_clear_soft_dirty(entry) = Clears a soft dirty entry at PMD level
+ * pte_swp_clear_soft_dirty(entry) = Clears a soft dirty swap entry at PTE level
+ * pmd_swp_clear_soft_dirty(entry) = Clears a soft dirty swap entry at PMD level
+ *
+ * pte_mkhuge(entry) = Creates a HugeTLB entry at given level
+ * pte_huge(entry) = Tests a HugeTLB entry at given level
+ *
+ * pmd_trans_huge(entry) = Tests a trans huge page at PMD level
+ * pud_trans_huge(entry) = Tests a trans huge page at PUD level
+ * pmd_present(entry) = Tests an entry points to memory at PMD level
+ * pud_present(entry) = Tests an entry points to memory at PUD level
+ * pmd_mknotpresent(entry) = Invalidates an PMD entry for MMU
+ * pud_mknotpresent(entry) = Invalidates an PUD entry for MMU
*/
#define VMFLAGS (VM_READ|VM_WRITE|VM_EXEC)
@@ -287,6 +325,233 @@ static void __init pmd_populate_tests(struct mm_struct *mm, pmd_t *pmdp,
WARN_ON(pmd_bad(pmd));
}
+#ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
Can we avoid ifdefs unless necessary ?
In mm/memory.c I see things like the following, it means pte_special()
always exist and a #ifdef is not necessary.
if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
if (likely(!pte_special(pte)))
goto check_pfn;
if (vma->vm_ops && vma->vm_ops->find_special_page)
return vma->vm_ops->find_special_page(vma, addr);
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
if (is_zero_pfn(pfn))
return NULL;
if (pte_devmap(pte))
return NULL;
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
+static void __init pte_special_tests(unsigned long pfn, pgprot_t prot)
+{
+ pte_t pte = pfn_pte(pfn, prot);
+
+ WARN_ON(!pte_special(pte_mkspecial(pte)));
+}
+#else
+static void __init pte_special_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+
+#ifdef CONFIG_NUMA_BALANCING
Same here, this ifdef shouldn't be necessary because in
/include/asm-generic/pgtable.h we have the following, so a if
(IS_ENABLED()) should be enough.
#ifndef CONFIG_NUMA_BALANCING
/*
* Technically a PTE can be PROTNONE even when not doing NUMA balancing but
* the only case the kernel cares is for NUMA balancing and is only
ever set
* when the VMA is accessible. For PROT_NONE VMAs, the PTEs are not marked
* _PAGE_PROTNONE so by by default, implement the helper as "always no". It
* is the responsibility of the caller to distinguish between PROT_NONE
* protections and NUMA hinting fault protections.
*/
static inline int pte_protnone(pte_t pte)
{
return 0;
}
static inline int pmd_protnone(pmd_t pmd)
{
return 0;
}
#endif /* CONFIG_NUMA_BALANCING */
+static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot)
+{
+ pte_t pte = pfn_pte(pfn, prot);
+
+ WARN_ON(!pte_protnone(pte));
+ WARN_ON(!pte_present(pte));
+}
+
+static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot)
+{
+ pmd_t pmd = pfn_pmd(pfn, prot);
+
+ WARN_ON(!pmd_protnone(pmd));
+ WARN_ON(!pmd_present(pmd));
+}
+#else
+static void __init pte_protnone_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_protnone_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+
+#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP
Same here, in include/linux/mm.h we have:
#ifndef CONFIG_ARCH_HAS_PTE_DEVMAP
static inline int pte_devmap(pte_t pte)
{
return 0;
}
#endif
+static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot)
+{
+ pte_t pte = pfn_pte(pfn, prot);
+
+ WARN_ON(!pte_devmap(pte_mkdevmap(pte)));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
Same. In inlude/asm-generic/pgtables.h you have:
#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) ||
!defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline int pmd_devmap(pmd_t pmd)
{
return 0;
}
static inline int pud_devmap(pud_t pud)
{
return 0;
}
static inline int pgd_devmap(pgd_t pgd)
{
return 0;
}
#endif
+static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot)
+{
+ pmd_t pmd = pfn_pmd(pfn, prot);
+
+ WARN_ON(!pmd_devmap(pmd_mkdevmap(pmd)));
+}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
Same, see above
+static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot)
+{
+ pud_t pud = pfn_pud(pfn, prot);
+
+ WARN_ON(!pud_devmap(pud_mkdevmap(pud)));
+}
+#else
+static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+#else
+static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+#else
+static void __init pte_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_devmap_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
Same, they always exist, see include/asm-generic/pgtable.h
+static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+ pte_t pte = pfn_pte(pfn, prot);
+
+ WARN_ON(!pte_soft_dirty(pte_mksoft_dirty(pte)));
+ WARN_ON(pte_soft_dirty(pte_clear_soft_dirty(pte)));
+}
+
+static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+ pte_t pte = pfn_pte(pfn, prot);
+
+ WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte)));
+ WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte)));
+}
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
Same
+static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+ pmd_t pmd = pfn_pmd(pfn, prot);
+
+ WARN_ON(!pmd_soft_dirty(pmd_mksoft_dirty(pmd)));
+ WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd)));
+}
+
+static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+ pmd_t pmd = pfn_pmd(pfn, prot);
+
+ WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd)));
+ WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd)));
+}
+#else
+static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+}
+#endif
+#else
+static void __init pte_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_soft_dirty_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pte_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+}
+static void __init pmd_swap_soft_dirty_tests(unsigned long pfn, pgprot_t prot)
+{
+}
+#endif
+
+static void __init pte_swap_tests(unsigned long pfn, pgprot_t prot)
+{
+ swp_entry_t swp;
+ pte_t pte;
+
+ pte = pfn_pte(pfn, prot);
+ swp = __pte_to_swp_entry(pte);
+ WARN_ON(!pte_same(pte, __swp_entry_to_pte(swp)));
+}
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot)
+{
+ swp_entry_t swp;
+ pmd_t pmd;
+
+ pmd = pfn_pmd(pfn, prot);
+ swp = __pmd_to_swp_entry(pmd);
+ WARN_ON(!pmd_same(pmd, __swp_entry_to_pmd(swp)));
+}
+#else
+static void __init pmd_swap_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+
+#ifdef CONFIG_MIGRATION
Same. See include/linux/swapops.h
+static void __init swap_migration_tests(struct page *page)
+{
+ swp_entry_t swp;
+
+ /*
+ * make_migration_entry() expects given page to be
+ * locked, otherwise it stumbles upon a BUG_ON().
+ */
+ __SetPageLocked(page);
+ swp = make_migration_entry(page, 1);
+ WARN_ON(!is_migration_entry(swp));
+ WARN_ON(!is_write_migration_entry(swp));
+
+ make_migration_entry_read(&swp);
+ WARN_ON(!is_migration_entry(swp));
+ WARN_ON(is_write_migration_entry(swp));
+
+ swp = make_migration_entry(page, 0);
+ WARN_ON(!is_migration_entry(swp));
+ WARN_ON(is_write_migration_entry(swp));
+ __ClearPageLocked(page);
+}
+#else
+static void __init swap_migration_tests(struct page *page) { }
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+static void __init hugetlb_tests(unsigned long pfn, pgprot_t prot)
+{
+#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
+ pte_t pte = pfn_pte(pfn, prot);
+
+ WARN_ON(!pte_huge(pte_mkhuge(pte)));
We also need tests on hugepd stuff
+#endif
+}
+#else
+static void __init hugetlb_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
Same, see include/asm-generic/pgtable.h
+static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot)
+{
+ pmd_t pmd;
+
+ /*
+ * pmd_trans_huge() and pmd_present() must return negative
+ * after MMU invalidation with pmd_mknotpresent().
+ */
+ pmd = pfn_pmd(pfn, prot);
+ WARN_ON(!pmd_trans_huge(pmd_mkhuge(pmd)));
+
+ /*
+ * Though platform specific test exclusions are not ideal,
+ * in this case S390 does not define pmd_mknotpresent()
+ * which should be tested on other platforms enabling THP.
+ */
+#ifndef CONFIG_S390
+ WARN_ON(pmd_trans_huge(pmd_mknotpresent(pmd)));
+ WARN_ON(pmd_present(pmd_mknotpresent(pmd)));
+#endif
Can we add a stub on S390 instead ?
+}
+
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
Same ?
+static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot)
+{
+ pud_t pud;
+
+ /*
+ * pud_trans_huge() and pud_present() must return negative
+ * after MMU invalidation with pud_mknotpresent().
+ */
+ pud = pfn_pud(pfn, prot);
+ WARN_ON(!pud_trans_huge(pud_mkhuge(pud)));
+ WARN_ON(pud_trans_huge(pud_mknotpresent(pud)));
+ WARN_ON(pud_present(pud_mknotpresent(pud)));
+}
+#else
+static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+#else
+static void __init pmd_thp_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_thp_tests(unsigned long pfn, pgprot_t prot) { }
+#endif
+
static unsigned long __init get_random_vaddr(void)
{
unsigned long random_vaddr, random_pages, total_user_pages;
@@ -302,13 +567,14 @@ static unsigned long __init get_random_vaddr(void)
void __init debug_vm_pgtable(void)
{
struct mm_struct *mm;
+ struct page *page;
pgd_t *pgdp;
p4d_t *p4dp, *saved_p4dp;
pud_t *pudp, *saved_pudp;
pmd_t *pmdp, *saved_pmdp, pmd;
pte_t *ptep;
pgtable_t saved_ptep;
- pgprot_t prot;
+ pgprot_t prot, protnone;
phys_addr_t paddr;
unsigned long vaddr, pte_aligned, pmd_aligned;
unsigned long pud_aligned, p4d_aligned, pgd_aligned;
@@ -322,6 +588,25 @@ void __init debug_vm_pgtable(void)
return;
}
+ /*
+ * swap_migration_tests() requires a dedicated page as it needs to
+ * be locked before creating a migration entry from it. Locking the
+ * page that actually maps kernel text ('start_kernel') can be real
+ * problematic. Lets allocate a dedicated page explicitly for this
+ * purpose that will be freed later.
+ */
+ page = alloc_page(GFP_KERNEL);
Can we do the page allocation and freeing in swap_migration_tests()
instead ?
+ if (!page) {
+ pr_err("page allocation failed\n");
+ return;
+ }
+
+ /*
+ * __P000 (or even __S000) will help create page table entries with
+ * PROT_NONE permission as required for pxx_protnone_tests().
+ */
+ protnone = __P000;
+
/*
* PFN for mapping at PTE level is determined from a standard kernel
* text symbol. But pfns for higher page table levels are derived by
@@ -377,11 +662,34 @@ void __init debug_vm_pgtable(void)
p4d_populate_tests(mm, p4dp, saved_pudp);
pgd_populate_tests(mm, pgdp, saved_p4dp);
+ pte_special_tests(pte_aligned, prot);
+ pte_protnone_tests(pte_aligned, protnone);
+ pmd_protnone_tests(pmd_aligned, protnone);
+
+ pte_devmap_tests(pte_aligned, prot);
+ pmd_devmap_tests(pmd_aligned, prot);
+ pud_devmap_tests(pud_aligned, prot);
+
+ pte_soft_dirty_tests(pte_aligned, prot);
+ pmd_soft_dirty_tests(pmd_aligned, prot);
+ pte_swap_soft_dirty_tests(pte_aligned, prot);
+ pmd_swap_soft_dirty_tests(pmd_aligned, prot);
+
+ pte_swap_tests(pte_aligned, prot);
+ pmd_swap_tests(pmd_aligned, prot);
+
+ swap_migration_tests(page);
+ hugetlb_tests(pte_aligned, prot);
+
+ pmd_thp_tests(pmd_aligned, prot);
+ pud_thp_tests(pud_aligned, prot);
+
p4d_free(mm, saved_p4dp);
pud_free(mm, saved_pudp);
pmd_free(mm, saved_pmdp);
pte_free(mm, saved_ptep);
+ __free_page(page);
mm_dec_nr_puds(mm);
mm_dec_nr_pmds(mm);
mm_dec_nr_ptes(mm);
Christophe