Add a framework of EPT in nested VMX testing, including a set of functions to construct and read EPT paging structures and a simple read/write test of EPT remapping from guest to host. Signed-off-by: Arthur Chunqi Li <yzt356@xxxxxxxxx> --- x86/vmx.c | 132 ++++++++++++++++++++++++++++++++++++++++++++-- x86/vmx.h | 76 +++++++++++++++++++++++++++ x86/vmx_tests.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 360 insertions(+), 4 deletions(-) diff --git a/x86/vmx.c b/x86/vmx.c index ca36d35..a156b71 100644 --- a/x86/vmx.c +++ b/x86/vmx.c @@ -143,6 +143,132 @@ asm( " call hypercall\n\t" ); +/* EPT paging structure related functions */ +/* install_ept_entry : Install a page to a given level in EPT + @pml4 : addr of pml4 table + @pte_level : level of PTE to set + @guest_addr : physical address of guest + @pte : pte value to set + @pt_page : address of page table, NULL for a new page + */ +void install_ept_entry(unsigned long *pml4, + int pte_level, + unsigned long guest_addr, + unsigned long pte, + unsigned long *pt_page) +{ + int level; + unsigned long *pt = pml4; + unsigned offset; + + for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { + offset = (guest_addr >> ((level-1) * EPT_PGDIR_WIDTH + 12)) + & EPT_PGDIR_MASK; + if (!(pt[offset] & (EPT_RA | EPT_WA | EPT_EA))) { + unsigned long *new_pt = pt_page; + if (!new_pt) + new_pt = alloc_page(); + else + pt_page = 0; + memset(new_pt, 0, PAGE_SIZE); + pt[offset] = virt_to_phys(new_pt) + | EPT_RA | EPT_WA | EPT_EA; + } + pt = phys_to_virt(pt[offset] & 0xffffffffff000ull); + } + offset = ((unsigned long)guest_addr >> ((level-1) * + EPT_PGDIR_WIDTH + 12)) & EPT_PGDIR_MASK; + pt[offset] = pte; +} + +/* Map a page, @perm is the permission of the page */ +void install_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); +} + +/* Map a 1G-size page */ +void install_1g_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 3, guest_addr, + (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); +} + +/* Map a 2M-size page */ +void install_2m_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 2, guest_addr, + (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); +} + +/* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. + @start : start address of guest page + @len : length of address to be mapped + @map_1g : whether 1G page map is used + @map_2m : whether 2M page map is used + @perm : permission for every page + */ +int setup_ept_range(unsigned long *pml4, unsigned long start, + unsigned long len, int map_1g, int map_2m, u64 perm) +{ + u64 phys = start; + u64 max = (u64)len + (u64)start; + + if (map_1g) { + while (phys + PAGE_SIZE_1G <= max) { + install_1g_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE_1G; + } + } + if (map_2m) { + while (phys + PAGE_SIZE_2M <= max) { + install_2m_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE_2M; + } + } + while (phys + PAGE_SIZE <= max) { + install_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE; + } + return 0; +} + +/* get_ept_pte : Get the PTE of a given level in EPT, + @level == 1 means get the latest level*/ +unsigned long get_ept_pte(unsigned long *pml4, + unsigned long guest_addr, int level) +{ + int l; + unsigned long *pt = pml4, pte; + unsigned offset; + + for (l = EPT_PAGE_LEVEL; l > 1; --l) { + offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12)) + & EPT_PGDIR_MASK; + pte = pt[offset]; + if (!(pte & (EPT_RA | EPT_WA | EPT_EA))) + return 0; + if (l == level) + return pte; + if (l < 4 && (pte & EPT_LARGE_PAGE)) + return pte; + pt = (unsigned long *)(pte & 0xffffffffff000ull); + } + offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12)) + & EPT_PGDIR_MASK; + pte = pt[offset]; + return pte; +} + static void init_vmcs_ctrl(void) { /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ @@ -336,10 +462,8 @@ static void init_vmx(void) : MSR_IA32_VMX_ENTRY_CTLS); ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC : MSR_IA32_VMX_PROCBASED_CTLS); - if (ctrl_cpu_rev[0].set & CPU_SECONDARY) - ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); - if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) - ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); diff --git a/x86/vmx.h b/x86/vmx.h index 28595d8..45aca11 100644 --- a/x86/vmx.h +++ b/x86/vmx.h @@ -432,6 +432,69 @@ enum Ctrl1 { #define HYPERCALL_MASK 0xFFF #define HYPERCALL_VMEXIT 0x1 +#define EPTP_PG_WALK_LEN_SHIFT 3ul +#define EPTP_AD_FLAG (1ul << 6) + +#define EPT_MEM_TYPE_UC 0ul +#define EPT_MEM_TYPE_WC 1ul +#define EPT_MEM_TYPE_WT 4ul +#define EPT_MEM_TYPE_WP 5ul +#define EPT_MEM_TYPE_WB 6ul + +#define EPT_RA 1ul +#define EPT_WA 2ul +#define EPT_EA 4ul +#define EPT_ACCESS_FLAG (1ul << 8) +#define EPT_DIRTY_FLAG (1ul << 9) +#define EPT_LARGE_PAGE (1ul << 7) +#define EPT_MEM_TYPE_SHIFT 3ul +#define EPT_IGNORE_PAT (1ul << 6) +#define EPT_SUPPRESS_VE (1ull << 63) + +#define EPT_1G_PAGE_SHIFT 30ul +#define EPT_1G_OFFSET_MASK ((1ull << EPT_1G_PAGE_SHIFT) - 1) +#define EPT_1G_PAGE_MASK (~EPT_1G_OFFSET_MASK) +#define EPT_2M_PAGE_SHIFT 21ul +#define EPT_2M_OFFSET_MASK ((1ull << EPT_2M_PAGE_SHIFT) - 1) +#define EPT_2M_PAGE_MASK (~EPT_2M_OFFSET_MASK) + +#define PDE_ADDR_MASK 0xFF8 +#define PAGE_ADDR_MASK 0xFFF +#define EPTP_2_PML4E(eptp) (eptp >> EPT_NEXT_LEVEL_PT_SHIFT); +#define PML4E_2_PDPTE(pml4e, addr) \ + ((pml4e & EPT_NEXT_LEVEL_PT_MASK) | ((addr >> 27ul) & PDE_ADDR_MASK)) +#define PDPTE_2_PDE(pdpte, addr) \ + ((pdpte & EPT_NEXT_LEVEL_PT_MASK) | ((addr >> 18ul) & PDE_ADDR_MASK)) +#define PDE_2_PTE(pde, addr) \ + ((pde & EPT_NEXT_LEVEL_PT_MASK) | ((addr >> 9ul) & PDE_ADDR_MASK)) +#define PTE_2_PHYADDR(pte, addr) \ + ((pte & EPT_NEXT_LEVEL_PT_MASK) | (addr & PAGE_ADDR_MASK)) +#define PDPTE_2_1G_ADDR(pdpte, addr) \ + ((pdpte & EPT_1G_PAGE_MASK) | (addr & EPT_1G_OFFSET_MASK)) +#define PDE_2_2M_ADDR(pde, addr) \ + ((pde & EPT_2M_PAGE_MASK) | (addr & EPT_2M_OFFSET_MASK)) + +#define EPT_CAP_WT 1ull +#define EPT_CAP_PWL4 (1ull << 6) +#define EPT_CAP_UC (1ull << 8) +#define EPT_CAP_WB (1ull << 14) +#define EPT_CAP_2M_PAGE (1ull << 16) +#define EPT_CAP_1G_PAGE (1ull << 17) +#define EPT_CAP_INVEPT (1ull << 20) +#define EPT_CAP_INVEPT_SINGLE (1ull << 25) +#define EPT_CAP_INVEPT_ALL (1ull << 26) +#define EPT_CAP_AD_FLAG (1ull << 21) + +#define PAGE_SIZE_2M (512 * PAGE_SIZE) +#define PAGE_SIZE_1G (512 * PAGE_SIZE_2M) +#define EPT_PAGE_LEVEL 4 +#define EPT_PGDIR_WIDTH 9 +#define EPT_PGDIR_MASK 511 +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define MAGIC_VAL_1 0x12345678ul +#define MAGIC_VAL_2 0x87654321ul +#define MAGIC_VAL_3 0xfffffffful extern struct regs regs; @@ -474,6 +537,19 @@ static inline int vmcs_save(struct vmcs **vmcs) void report(const char *name, int result); void print_vmexit_info(); +void install_ept_entry(unsigned long *pml4, int pte_level, + unsigned long guest_addr, unsigned long pte, + unsigned long *pt_page); +void install_1g_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void install_2m_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void install_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +int setup_ept_range(unsigned long *pml4, unsigned long start, + unsigned long len, int map_1g, int map_2m, u64 perm); +unsigned long get_ept_pte(unsigned long *pml4, + unsigned long guest_addr, int level); #endif diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c index c1b39f4..99632b5 100644 --- a/x86/vmx_tests.c +++ b/x86/vmx_tests.c @@ -1,4 +1,35 @@ #include "vmx.h" +#include "processor.h" +#include "vm.h" +#include "msr.h" +#include "fwcfg.h" + +volatile u32 stage; +volatile bool init_fail; +unsigned long *pml4; +void *data_page1, *data_page2; + +static inline void set_stage(u32 s) +{ + barrier(); + stage = s; + barrier(); +} + +static inline u32 get_stage() +{ + u32 s; + + barrier(); + s = stage; + barrier(); + return s; +} + +static inline void vmcall() +{ + asm volatile ("vmcall"); +} void basic_init() { @@ -76,6 +107,129 @@ int vmenter_exit_handler() return VMX_TEST_VMEXIT; } +static int setup_ept() +{ + u64 eptp; + int support_2m; + unsigned long end_of_memory; + + if (!(ept_vpid.val & EPT_CAP_UC) && + !(ept_vpid.val & EPT_CAP_WB)) { + printf("\tEPT paging-structure memory type " + "UC&WB are not supported\n"); + return 1; + } + if (ept_vpid.val & EPT_CAP_UC) + eptp = EPT_MEM_TYPE_UC; + else + eptp = EPT_MEM_TYPE_WB; + if (!(ept_vpid.val & EPT_CAP_PWL4)) { + printf("\tPWL4 is not supported\n"); + return 1; + } + eptp |= (3 << EPTP_PG_WALK_LEN_SHIFT); + pml4 = alloc_page(); + memset(pml4, 0, PAGE_SIZE); + eptp |= virt_to_phys(pml4); + vmcs_write(EPTP, eptp); + support_2m = !!(ept_vpid.val & EPT_CAP_2M_PAGE); + end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE); + if (end_of_memory < (1ul << 32)) + end_of_memory = (1ul << 32); + if (setup_ept_range(pml4, 0, end_of_memory, + 0, support_2m, EPT_WA | EPT_RA | EPT_EA)) { + printf("\tSet ept tables failed.\n"); + return 1; + } + return 0; +} + +static void ept_init() +{ + u32 ctrl_cpu[2]; + + init_fail = false; + ctrl_cpu[0] = vmcs_read(CPU_EXEC_CTRL0); + ctrl_cpu[1] = vmcs_read(CPU_EXEC_CTRL1); + ctrl_cpu[0] = (ctrl_cpu[0] | CPU_SECONDARY) + & ctrl_cpu_rev[0].clr; + ctrl_cpu[1] = (ctrl_cpu[1] | CPU_EPT) + & ctrl_cpu_rev[1].clr; + vmcs_write(CPU_EXEC_CTRL0, ctrl_cpu[0]); + vmcs_write(CPU_EXEC_CTRL1, ctrl_cpu[1] | CPU_EPT); + if (setup_ept()) + init_fail = true; + data_page1 = alloc_page(); + data_page2 = alloc_page(); + memset(data_page1, 0x0, PAGE_SIZE); + memset(data_page2, 0x0, PAGE_SIZE); + *((u32 *)data_page1) = MAGIC_VAL_1; + *((u32 *)data_page2) = MAGIC_VAL_2; + install_ept(pml4, (unsigned long)data_page1, (unsigned long)data_page2, + EPT_RA | EPT_WA | EPT_EA); +} + +static void ept_main() +{ + if (init_fail) + return; + if (!(ctrl_cpu_rev[0].clr & CPU_SECONDARY) + && !(ctrl_cpu_rev[1].clr & CPU_EPT)) { + printf("\tEPT is not supported\n"); + return; + } + set_stage(0); + if (*((u32 *)data_page2) != MAGIC_VAL_1 && + *((u32 *)data_page1) != MAGIC_VAL_1) + report("EPT basic framework - read\n", 0); + else { + *((u32 *)data_page2) = MAGIC_VAL_3; + vmcall(); + if (get_stage() == 1) { + if (*((u32 *)data_page1) == MAGIC_VAL_3 && + *((u32 *)data_page2) == MAGIC_VAL_2) + report("EPT basic framework\n", 1); + else + report("EPT basic framework - remap\n", 1); + } + } +} + +static int ept_exit_handler() +{ + u64 guest_rip; + ulong reason; + u32 insn_len; + + guest_rip = vmcs_read(GUEST_RIP); + reason = vmcs_read(EXI_REASON) & 0xff; + insn_len = vmcs_read(EXI_INST_LEN); + switch (reason) { + case VMX_VMCALL: + switch (get_stage()) { + case 0: + if (*((u32 *)data_page1) == MAGIC_VAL_3 && + *((u32 *)data_page2) == MAGIC_VAL_2) { + set_stage(get_stage() + 1); + install_ept(pml4, (unsigned long)data_page2, + (unsigned long)data_page2, + EPT_RA | EPT_WA | EPT_EA); + } else + report("EPT basic framework - write\n", 0); + break; + default: + printf("\tERROR - unknow stage, %d.\n", get_stage()); + return VMX_TEST_VMEXIT; + } + vmcs_write(GUEST_RIP, guest_rip + insn_len); + return VMX_TEST_RESUME; + default: + printf("Unknown exit reason, %d\n", reason); + print_vmexit_info(); + } + return VMX_TEST_VMEXIT; +} + /* name/init/guest_main/exit_handler/syscall_handler/guest_regs basic_* just implement some basic functions */ struct vmx_test vmx_tests[] = { @@ -83,5 +237,7 @@ struct vmx_test vmx_tests[] = { basic_syscall_handler, {0} }, { "vmenter", basic_init, vmenter_main, vmenter_exit_handler, basic_syscall_handler, {0} }, + { "EPT framework", ept_init, ept_main, ept_exit_handler, + basic_syscall_handler, {0} }, { NULL, NULL, NULL, NULL, NULL, {0} }, }; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html