From: Marc Orr <marcorr@xxxxxxxxxx> This patch introduces split_large_page(), which is used to force the APIC-access address to be a 4k page. Otherwise, the apic_reg_virt_test fails on upstream. Signed-off-by: Marc Orr <marcorr@xxxxxxxxxx> Reviewed-by: Jim Mattson <jmattson@xxxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- lib/x86/processor.h | 10 +++++++ lib/x86/vm.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/x86/vm.h | 3 ++ x86/vmx_tests.c | 6 ++++ 4 files changed, 102 insertions(+) diff --git a/lib/x86/processor.h b/lib/x86/processor.h index 916e67d..6f77148 100644 --- a/lib/x86/processor.h +++ b/lib/x86/processor.h @@ -36,6 +36,7 @@ #define X86_CR4_PSE 0x00000010 #define X86_CR4_PAE 0x00000020 #define X86_CR4_MCE 0x00000040 +#define X86_CR4_PGE 0x00000080 #define X86_CR4_PCE 0x00000100 #define X86_CR4_UMIP 0x00000800 #define X86_CR4_VMXE 0x00002000 @@ -477,4 +478,13 @@ static inline void set_bit(int bit, u8 *addr) : "+m" (*addr) : "Ir" (bit) : "cc", "memory"); } +static inline void flush_tlb(void) +{ + ulong cr4; + + cr4 = read_cr4(); + write_cr4(cr4 ^ X86_CR4_PGE); + write_cr4(cr4); +} + #endif diff --git a/lib/x86/vm.c b/lib/x86/vm.c index 73d9be4..edbbe82 100644 --- a/lib/x86/vm.c +++ b/lib/x86/vm.c @@ -177,3 +177,86 @@ phys_addr_t virt_to_pte_phys(pgd_t *cr3, void *mem) { return (*get_pte(cr3, mem) & PT_ADDR_MASK) + ((ulong)mem & (PAGE_SIZE - 1)); } + +/* + * split_large_page: Split a 2M/1G large page into 512 smaller PTEs. + * @ptep : large page table entry to split + * @level : level of ptep (2 or 3) + */ +void split_large_page(unsigned long *ptep, int level) +{ + unsigned long *new_pt; + unsigned long pa; + unsigned long pte; + unsigned long prototype; + int i; + + pte = *ptep; + assert(pte & PT_PRESENT_MASK); + assert(pte & PT_PAGE_SIZE_MASK); + assert(level == 2 || level == 3); + + new_pt = alloc_page(); + assert(new_pt); + + prototype = pte & ~PT_ADDR_MASK; + if (level == 2) + prototype &= ~PT_PAGE_SIZE_MASK; + + pa = pte & PT_ADDR_MASK; + for (i = 0; i < (1 << PGDIR_WIDTH); i++) { + new_pt[i] = prototype | pa; + pa += 1ul << PGDIR_BITS(level - 1); + } + + pte &= ~PT_PAGE_SIZE_MASK; + pte &= ~PT_ADDR_MASK; + pte |= virt_to_phys(new_pt); + + /* Modify the relevant paging-structure entry */ + *ptep = pte; + + /* + * Flush the TLB to eradicate stale mappings. + * + * Note: Removing specific TLB mappings is tricky because + * split_large_page() can be called to split the active code page + * backing the next set of instructions to be fetched and executed. + * Furthermore, Intel SDM volume 3 recommends to clear the present bit + * for the page being split, before invalidating any mappings. + * + * But clearing the mapping from the page table and removing it from the + * TLB (where it's not actually guaranteed to reside anyway) makes it + * impossible to continue fetching instructions! + */ + flush_tlb(); +} + +/* + * force_4k_page: Ensures that addr translate to a 4k page. + * + * This function uses split_large_page(), as needed, to ensure that target + * address, addr, translates to a 4k page. + * + * @addr: target address that should be mapped to a 4k page + */ +void force_4k_page(void *addr) +{ + unsigned long *ptep; + unsigned long pte; + unsigned long *cr3 = current_page_table(); + + ptep = get_pte_level(cr3, addr, 3); + assert(ptep); + pte = *ptep; + assert(pte & PT_PRESENT_MASK); + if (pte & PT_PAGE_SIZE_MASK) + split_large_page(ptep, 3); + + ptep = get_pte_level(cr3, addr, 2); + assert(ptep); + pte = *ptep; + assert(pte & PT_PRESENT_MASK); + if (pte & PT_PAGE_SIZE_MASK) + split_large_page(ptep, 2); +} diff --git a/lib/x86/vm.h b/lib/x86/vm.h index 729f172..8750a1e 100644 --- a/lib/x86/vm.h +++ b/lib/x86/vm.h @@ -42,4 +42,7 @@ static inline void *current_page_table(void) { return phys_to_virt(read_cr3()); } + +void split_large_page(unsigned long *ptep, int level); +void force_4k_page(void *addr); #endif diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c index 73aca95..092e70e 100644 --- a/x86/vmx_tests.c +++ b/x86/vmx_tests.c @@ -5673,7 +5673,13 @@ static void apic_reg_virt_test(void) test_set_guest(apic_reg_virt_guest); + /* + * From the SDM: The 1-setting of the "virtualize APIC accesses" + * VM-execution is guaranteed to apply only if translations to the + * APIC-access address use a 4-KByte page. + */ apic_access_address = alloc_page(); + force_4k_page(apic_access_address); vmcs_write(APIC_ACCS_ADDR, virt_to_phys(apic_access_address)); virtual_apic_page = alloc_page(); -- 1.8.3.1