On Mon, Nov 02, 2020 at 11:34:43AM +0000, Nikos Nikoleris wrote:
Make the translation granule configurable for arm64. arm64 supports
page sizes of 4K, 16K and 64K. By default, arm64 is configured with
64K pages. configure has been extended with a new argument:
--page-shift=(12|14|16)
--page-size=PAGE_SIZE
which allows the user to set the page shift and therefore the page
size for arm64.
which allows the user to set the page size for arm64.
Using the --page-shift for any other architecture
--page-size
results an error message.
To allow for smaller page sizes and 42b VA, this change adds support
for 4-level and 3-level page tables. At compile time, we determine how
many levels in the page tables we needed.
Signed-off-by: Nikos Nikoleris <nikos.nikoleris@xxxxxxx>
---
configure | 26 +++++++++++++
lib/arm/asm/page.h | 4 ++
lib/arm/asm/pgtable-hwdef.h | 4 ++
lib/arm/asm/pgtable.h | 6 +++
lib/arm/asm/thread_info.h | 4 +-
lib/arm64/asm/page.h | 25 ++++++++++---
lib/arm64/asm/pgtable-hwdef.h | 38 +++++++++++++------
lib/arm64/asm/pgtable.h | 69 +++++++++++++++++++++++++++++++++--
lib/arm/mmu.c | 26 ++++++++-----
arm/cstart64.S | 10 ++++-
10 files changed, 180 insertions(+), 32 deletions(-)
diff --git a/configure b/configure
index 706aab5..9c6bb2c 100755
--- a/configure
+++ b/configure
@@ -25,6 +25,7 @@ vmm="qemu"
errata_force=0
erratatxt="$srcdir/errata.txt"
host_key_document=
+page_size=
usage() {
cat <<-EOF
@@ -50,6 +51,8 @@ usage() {
--host-key-document=HOST_KEY_DOCUMENT
Specify the machine-specific host-key document for creating
a PVM image with 'genprotimg' (s390x only)
+ --page-size=PAGE_SIZE
+ Specify the page size (translation granule) (arm64 only)
EOF
exit 1
}
@@ -105,6 +108,9 @@ while [[ "$1" = -* ]]; do
--host-key-document)
host_key_document="$arg"
;;
+ --page-size)
+ page_size="$arg"
+ ;;
--help)
usage
;;
@@ -123,6 +129,25 @@ arch_name=$arch
[ "$arch" = "aarch64" ] && arch="arm64"
[ "$arch_name" = "arm64" ] && arch_name="aarch64"
+if [ -z "$page_size" ]; then
+ [ "$arch" = "arm64" ] && page_size="65536"
+ [ "$arch" = "arm" ] && page_size="4096"
+else
+ if [ "$arch" != "arm64" ]; then
+ echo "--page-size is not supported for $arch"
+ usage
+ fi
+
+ if [ "${page_size: -1}" = "K" ] || [ "${page_size: -1}" = "k" ]; then
+ page_size=$[ ${page_size%?} * 1024 ]
$[...] is legacy syntax. Please use $((...))
+ fi
+ if [ "$page_size" != "4096" ] && [ "$page_size" != "16384" ] &&
+ [ "$page_size" != "65536" ]; then
+ echo "arm64 doesn't support page size of $page_size"
+ usage
+ fi
+fi
+
[ -z "$processor" ] && processor="$arch"
if [ "$processor" = "arm64" ]; then
@@ -254,6 +279,7 @@ cat <<EOF >> lib/config.h
#define CONFIG_UART_EARLY_BASE ${arm_uart_early_addr}
#define CONFIG_ERRATA_FORCE ${errata_force}
+#define CONFIG_PAGE_SIZE ${page_size}
EOF
fi
diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
index 039c9f7..ae0ac2c 100644
--- a/lib/arm/asm/page.h
+++ b/lib/arm/asm/page.h
@@ -29,6 +29,10 @@ typedef struct { pteval_t pgprot; } pgprot_t;
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
+/* For compatibility with arm64 page tables */
+#define pud_t pgd_t
+#define pud_val(x) pgd_val(x)
+
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
diff --git a/lib/arm/asm/pgtable-hwdef.h b/lib/arm/asm/pgtable-hwdef.h
index 4107e18..fe1d854 100644
--- a/lib/arm/asm/pgtable-hwdef.h
+++ b/lib/arm/asm/pgtable-hwdef.h
@@ -19,6 +19,10 @@
#define PTRS_PER_PTE 512
#define PTRS_PER_PMD 512
+/* For compatibility with arm64 page tables */
+#define PUD_SIZE PGDIR_SIZE
+#define PUD_MASK PGDIR_MASK
+
#define PMD_SHIFT 21
#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
#define PMD_MASK (~((1 << PMD_SHIFT) - 1))
diff --git a/lib/arm/asm/pgtable.h b/lib/arm/asm/pgtable.h
index 078dd16..4759d82 100644
--- a/lib/arm/asm/pgtable.h
+++ b/lib/arm/asm/pgtable.h
@@ -53,6 +53,12 @@ static inline pmd_t *pgd_page_vaddr(pgd_t pgd)
return pgtable_va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
}
+/* For compatibility with arm64 page tables */
+#define pud_valid(pud) pgd_valid(pud)
+#define pud_offset(pgd, addr) ((pud_t *)pgd)
+#define pud_free(pud)
+#define pud_alloc(pgd, addr) pud_offset(pgd, addr)
Please watch the spaces vs. tabs. This comment applies to the
whole patch. There are many places.
+#else
+#error Unsupported PAGE_SIZE
+#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PGTABLE_LEVELS (((VA_BITS) - 4) / (PAGE_SHIFT - 3))
Please replace the above define with
/*
* Since a page table descriptor is 8 bytes we have (PAGE_SHIFT - 3) bits
* of virtual address at each page table level. So, to get the number of
* page table levels needed, we round up the division of the number of
* address bits (VA_BITS - PAGE_SHIFT) by (PAGE_SHIFT - 3).
*/
#define PGTABLE_LEVELS \
(((VA_BITS - PAGE_SHIFT) + ((PAGE_SHIFT - 3) - 1)) / (PAGE_SHIFT - 3))
+
#ifndef __ASSEMBLY__
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
typedef u64 pteval_t;
typedef u64 pmdval_t;
+typedef u64 pudval_t;
typedef u64 pgdval_t;
typedef struct { pteval_t pte; } pte_t;
+typedef struct { pmdval_t pmd; } pmd_t;
+typedef struct { pudval_t pud; } pud_t;
typedef struct { pgdval_t pgd; } pgd_t;
typedef struct { pteval_t pgprot; } pgprot_t;
#define pte_val(x) ((x).pte)
+#define pmd_val(x) ((x).pmd)
+#define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)
#define __pte(x) ((pte_t) { (x) } )
+#define __pmd(x) ((pmd_t) { (x) } )
+#define __pud(x) ((pud_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-typedef struct { pgd_t pgd; } pmd_t;
-#define pmd_val(x) (pgd_val((x).pgd))
-#define __pmd(x) ((pmd_t) { __pgd(x) } )
-
#define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x)))
#define __pa(x) __virt_to_phys((unsigned long)(x))
diff --git a/lib/arm64/asm/pgtable-hwdef.h b/lib/arm64/asm/pgtable-hwdef.h
index 3352489..3b6b0d6 100644
--- a/lib/arm64/asm/pgtable-hwdef.h
+++ b/lib/arm64/asm/pgtable-hwdef.h
@@ -9,38 +9,54 @@
* This work is licensed under the terms of the GNU GPL, version 2.
*/
+#include <asm/page.h>
+
#define UL(x) _AC(x, UL)
+#define PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
Please replace the above define with
/*
* The number of bits a given page table level, n (where n=0 is the top),
* maps is ((max_n - n) - 1) * nr_bits_per_level + PAGE_SHIFT. Since a page
* table descriptor is 8 bytes we have (PAGE_SHIFT - 3) bits per level. We
* also have a maximum of 4 page table levels. Hence,
*/
#define PGTABLE_LEVEL_SHIFT(n) \
(((4 - (n)) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT)
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
+#if PGTABLE_LEVELS > 2
+#define PMD_SHIFT PGTABLE_LEVEL_SHIFT(2)
+#define PTRS_PER_PMD PTRS_PER_PTE
+#define PMD_SIZE (UL(1) << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+#endif
+
+#if PGTABLE_LEVELS > 3
+#define PUD_SHIFT PGTABLE_LEVEL_SHIFT(1)
+#define PTRS_PER_PUD PTRS_PER_PTE
+#define PUD_SIZE (UL(1) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#else
+#define PUD_SIZE PGDIR_SIZE
+#define PUD_MASK PGDIR_MASK
+#endif
+
+#define PUD_VALID (_AT(pudval_t, 1) << 0)
+
/*
* PGDIR_SHIFT determines the size a top-level page table entry can map
* (depending on the configuration, this level can be 0, 1 or 2).
*/
-#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * PGTABLE_LEVELS + 3)
+#define PGDIR_SHIFT PGTABLE_LEVEL_SHIFT(4 - PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
#define PGD_VALID (_AT(pgdval_t, 1) << 0)
-/* From include/asm-generic/pgtable-nopmd.h */
-#define PMD_SHIFT PGDIR_SHIFT
-#define PTRS_PER_PMD 1
-#define PMD_SIZE (UL(1) << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
/*
* Section address mask and size definitions.
*/
-#define SECTION_SHIFT PMD_SHIFT
-#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
-#define SECTION_MASK (~(SECTION_SIZE-1))
+#define SECTION_SHIFT PMD_SHIFT
+#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
+#define SECTION_MASK (~(SECTION_SIZE-1))
/*
* Hardware page table definitions.
*
- * Level 1 descriptor (PMD).
+ * Level 0,1,2 descriptor (PGD, PUD and PMD).
*/
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
diff --git a/lib/arm64/asm/pgtable.h b/lib/arm64/asm/pgtable.h
index e577d9c..c7632ae 100644
--- a/lib/arm64/asm/pgtable.h
+++ b/lib/arm64/asm/pgtable.h
@@ -30,10 +30,12 @@
#define pgtable_pa(x) ((unsigned long)(x))
#define pgd_none(pgd) (!pgd_val(pgd))
+#define pud_none(pud) (!pud_val(pud))
#define pmd_none(pmd) (!pmd_val(pmd))
#define pte_none(pte) (!pte_val(pte))
#define pgd_valid(pgd) (pgd_val(pgd) & PGD_VALID)
+#define pud_valid(pud) (pud_val(pud) & PUD_VALID)
#define pmd_valid(pmd) (pmd_val(pmd) & PMD_SECT_VALID)
#define pte_valid(pte) (pte_val(pte) & PTE_VALID)
@@ -52,15 +54,76 @@ static inline pgd_t *pgd_alloc(void)
return pgd;
}
-#define pmd_offset(pgd, addr) ((pmd_t *)pgd)
-#define pmd_free(pmd)
-#define pmd_alloc(pgd, addr) pmd_offset(pgd, addr)
+static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+{
+ return pgtable_va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+}
+
+static inline pmd_t *pud_page_vaddr(pud_t pud)
+{
+ return pgtable_va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+}
+
static inline pte_t *pmd_page_vaddr(pmd_t pmd)
{
return pgtable_va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
}
+#if PGTABLE_LEVELS > 2
+#define pmd_index(addr) \
+ (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pmd_offset(pud, addr) \
+ (pud_page_vaddr(*(pud)) + pmd_index(addr))
+#define pmd_free(pmd) free_page(pmd)
+static inline pmd_t *pmd_alloc_one(void)
+{
+ assert(PTRS_PER_PMD * sizeof(pmd_t) == PAGE_SIZE);
+ pmd_t *pmd = alloc_page();
+ return pmd;
+}
+static inline pmd_t *pmd_alloc(pud_t *pud, unsigned long addr)
+{
+ if (pud_none(*pud)) {
+ pud_t entry;
+ pud_val(entry) = pgtable_pa(pmd_alloc_one()) | PMD_TYPE_TABLE;
+ WRITE_ONCE(*pud, entry);
+ }
+ return pmd_offset(pud, addr);
+}
+#else
+#define pmd_offset(pud, addr) ((pmd_t *)pud)
+#define pmd_free(pmd)
+#define pmd_alloc(pud, addr) pmd_offset(pud, addr)
+#endif
+
+#if PGTABLE_LEVELS > 3
+#define pud_index(addr) \
+ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pud_offset(pgd, addr) \
+ (pgd_page_vaddr(*(pgd)) + pud_index(addr))
+#define pud_free(pud) free_page(pud)
+static inline pud_t *pud_alloc_one(void)
+{
+ assert(PTRS_PER_PMD * sizeof(pud_t) == PAGE_SIZE);
PTRS_PER_PUD
+ pud_t *pud = alloc_page();
+ return pud;
+}
+static inline pud_t *pud_alloc(pgd_t *pgd, unsigned long addr)
+{
+ if (pgd_none(*pgd)) {
+ pgd_t entry;
+ pgd_val(entry) = pgtable_pa(pud_alloc_one()) | PMD_TYPE_TABLE;
+ WRITE_ONCE(*pgd, entry);
+ }
+ return pud_offset(pgd, addr);
+}
+#else
+#define pud_offset(pgd, addr) ((pud_t *)pgd)
+#define pud_free(pud)
+#define pud_alloc(pgd, addr) pud_offset(pgd, addr)
+#endif
+
#define pte_index(addr) \
(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
#define pte_offset(pmd, addr) \
diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
index 540a1e8..6d1c75b 100644
--- a/lib/arm/mmu.c
+++ b/lib/arm/mmu.c
@@ -81,7 +81,8 @@ void mmu_disable(void)
static pteval_t *get_pte(pgd_t *pgtable, uintptr_t vaddr)
{
pgd_t *pgd = pgd_offset(pgtable, vaddr);
- pmd_t *pmd = pmd_alloc(pgd, vaddr);
+ pud_t *pud = pud_alloc(pgd, vaddr);
+ pmd_t *pmd = pmd_alloc(pud, vaddr);
pte_t *pte = pte_alloc(pmd, vaddr);
return &pte_val(*pte);
@@ -133,18 +134,20 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
phys_addr_t phys_start, phys_addr_t phys_end,
pgprot_t prot)
{
- phys_addr_t paddr = phys_start & PGDIR_MASK;
- uintptr_t vaddr = virt_offset & PGDIR_MASK;
+ phys_addr_t paddr = phys_start & PUD_MASK;
+ uintptr_t vaddr = virt_offset & PUD_MASK;
uintptr_t virt_end = phys_end - paddr + vaddr;
pgd_t *pgd;
- pgd_t entry;
+ pud_t *pud;
+ pud_t entry;
- for (; vaddr < virt_end; vaddr += PGDIR_SIZE, paddr += PGDIR_SIZE) {
- pgd_val(entry) = paddr;
- pgd_val(entry) |= PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S;
- pgd_val(entry) |= pgprot_val(prot);
+ for (; vaddr < virt_end; vaddr += PUD_SIZE, paddr += PUD_SIZE) {
+ pud_val(entry) = paddr;
+ pud_val(entry) |= PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S;
+ pud_val(entry) |= pgprot_val(prot);
pgd = pgd_offset(pgtable, vaddr);
- WRITE_ONCE(*pgd, entry);
+ pud = pud_alloc(pgd, vaddr);
+ WRITE_ONCE(*pud, entry);
flush_tlb_page(vaddr);
}
}
@@ -207,6 +210,7 @@ unsigned long __phys_to_virt(phys_addr_t addr)
void mmu_clear_user(pgd_t *pgtable, unsigned long vaddr)
{
pgd_t *pgd;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -215,7 +219,9 @@ void mmu_clear_user(pgd_t *pgtable, unsigned long vaddr)
pgd = pgd_offset(pgtable, vaddr);
assert(pgd_valid(*pgd));
- pmd = pmd_offset(pgd, vaddr);
+ pud = pud_offset(pgd, vaddr);
+ assert(pud_valid(*pud));
+ pmd = pmd_offset(pud, vaddr);
assert(pmd_valid(*pmd));
if (pmd_huge(*pmd)) {
diff --git a/arm/cstart64.S b/arm/cstart64.S
index ffdd49f..cedc678 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -157,6 +157,14 @@ halt:
*/
#define MAIR(attr, mt) ((attr) << ((mt) * 8))
+#if PAGE_SIZE == 65536
+#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
+#elif PAGE_SIZE == 16384
+#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K
+#elif PAGE_SIZE == 4096
+#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
+#endif
+
.globl asm_mmu_enable
asm_mmu_enable:
tlbi vmalle1 // invalidate I + D TLBs
@@ -164,7 +172,7 @@ asm_mmu_enable:
/* TCR */
ldr x1, =TCR_TxSZ(VA_BITS) | \
- TCR_TG0_64K | TCR_TG1_64K | \
+ TCR_TG_FLAGS | \
TCR_IRGN_WBWA | TCR_ORGN_WBWA | \
TCR_SHARED
mrs x2, id_aa64mmfr0_el1
--
2.17.1
Testing results looked pretty good. Everywhere I tried (TCG, seattle,
mustang, thunderx) the 4K and 64K pages worked. Most places the 16K
pages were rejected. The thunderx allowed them, but then the tests
hung. I checked what was going on with the QEMU monitor. While writing
the page tables it got an exception. I have a feeling that's a host
problem rather than a problem with this patch though.