On 25/08/2017 13:57, Yu Zhang wrote: > Provide paging mode switching logic to run access test in 5 > level paging mode if LA57 is detected. Qemu parameter +la57 > should be used to expose this feature, for example: > ./x86-run ./x86/access.flat -cpu qemu64,+la57 > > Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx> > --- > x86/access.c | 17 +++++++++++++++-- > x86/cstart64.S | 43 ++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 57 insertions(+), 3 deletions(-) I need this for the test to pass in QEMU. Probably the TLB is helping you on real hardware! Pushed with this change. diff --git a/x86/access.c b/x86/access.c index 640ca8c..c99c5c1 100644 --- a/x86/access.c +++ b/x86/access.c @@ -322,7 +322,7 @@ pt_element_t ac_test_alloc_pt(ac_pool_t *pool) _Bool ac_test_enough_room(ac_pool_t *pool) { - return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size; + return pool->pt_pool_current + 5 * PAGE_SIZE <= pool->pt_pool_size; } void ac_test_reset_pt_pool(ac_pool_t *pool) @@ -467,6 +467,7 @@ void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, { unsigned long root = read_cr3(); int flags = at->flags; + bool skip = true; if (!ac_test_enough_room(pool)) ac_test_reset_pt_pool(pool); @@ -476,6 +477,16 @@ void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); unsigned index = PT_INDEX((unsigned long)at->virt, i); pt_element_t pte = 0; + + /* + * Reuse existing page tables along the path to the test code and data + * (which is in the bottom 2MB). + */ + if (skip && i >= 2 && index == 0) { + goto next; + } + skip = false; + switch (i) { case 5: case 4: @@ -535,6 +546,7 @@ void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, break; } vroot[index] = pte; + next: root = vroot[index]; } ac_set_expected_status(at); > diff --git a/x86/access.c b/x86/access.c > index 0546dbb..56d17a1 100644 > --- a/x86/access.c > +++ b/x86/access.c > @@ -15,6 +15,7 @@ typedef unsigned long pt_element_t; > static int cpuid_7_ebx; > static int cpuid_7_ecx; > static int invalid_mask; > +static int page_table_levels; > > #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK)) > #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) > @@ -107,6 +108,8 @@ enum { > #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT) > #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT) > > +extern void setup_5level_page_table(); > + > const char *ac_names[] = { > [AC_PTE_PRESENT_BIT] = "pte.p", > [AC_PTE_ACCESSED_BIT] = "pte.a", > @@ -467,11 +470,12 @@ void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page, > ac_test_reset_pt_pool(pool); > > at->ptep = 0; > - for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { > + for (int i = page_table_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { > pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); > unsigned index = PT_INDEX((unsigned long)at->virt, i); > pt_element_t pte = 0; > switch (i) { > + case 5: > case 4: > case 3: > pte = pd_page ? pd_page : ac_test_alloc_pt(pool); > @@ -552,7 +556,7 @@ static void dump_mapping(ac_test_t *at) > int i; > > printf("Dump mapping: address: %p\n", at->virt); > - for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { > + for (i = page_table_levels ; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) { > pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK); > unsigned index = PT_INDEX((unsigned long)at->virt, i); > pt_element_t pte = vroot[index]; > @@ -986,6 +990,15 @@ int main() > cpuid_7_ecx = cpuid(7).c; > > printf("starting test\n\n"); > + page_table_levels = 4; > r = ac_test_run(); > + > + if (cpuid_7_ecx & (1 << 16)) { > + page_table_levels = 5; > + setup_5level_page_table(); > + printf("starting 5-level paging test.\n\n"); > + r = ac_test_run(); > + } > + > return r ? 0 : 1; > } > diff --git a/x86/cstart64.S b/x86/cstart64.S > index 4c26fb2..8e4a1f3 100644 > --- a/x86/cstart64.S > +++ b/x86/cstart64.S > @@ -45,6 +45,10 @@ ptl4: > .quad ptl3 + 7 > > .align 4096 > +ptl5: > + .quad ptl4 + 7 > + > +.align 4096 > > gdt64_desc: > .word gdt64_end - gdt64 - 1 > @@ -91,6 +95,8 @@ tss_end: > > mb_boot_info: .quad 0 > > +pt_root: .quad ptl4 > + > .section .init > > .code32 > @@ -119,14 +125,36 @@ start: > call prepare_64 > jmpl $8, $start64 > > +switch_to_5level: > + /* Disable CR4.PCIDE */ > + mov %cr4, %eax > + btr $17, %eax > + mov %eax, %cr4 > + > + mov %cr0, %eax > + btr $31, %eax > + mov %eax, %cr0 > + > + mov $ptl5, %eax > + mov %eax, pt_root > + > + /* Enable CR4.LA57 */ > + mov %cr4, %eax > + bts $12, %eax > + mov %eax, %cr4 > + > + call enter_long_mode > + jmpl $8, $lvl5 > + > prepare_64: > lgdt gdt64_desc > > +enter_long_mode: > mov %cr4, %eax > bts $5, %eax // pae > mov %eax, %cr4 > > - mov $ptl4, %eax > + mov pt_root, %eax > mov %eax, %cr3 > > efer = 0xc0000080 > @@ -211,6 +239,19 @@ start64: > mov %eax, %edi > call exit > > +.globl setup_5level_page_table > +setup_5level_page_table: > + /* Check if 5-level paging has already enabled */ > + mov %cr4, %rax > + test $12, %eax > + jnz lvl5 > + > + pushq $32 > + pushq $switch_to_5level > + lretq > +lvl5: > + retq > + > idt_descr: > .word 16 * 256 - 1 > .quad boot_idt >