The framework of EPT for nested VMX, including functions to build up EPT paging structures, read/set EPT PTEs and setup a range of 1:1 map EPT. Signed-off-by: Arthur Chunqi Li <yzt356@xxxxxxxxx> --- x86/vmx.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- x86/vmx.h | 76 +++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 4 deletions(-) diff --git a/x86/vmx.c b/x86/vmx.c index ca36d35..87d1d55 100644 --- a/x86/vmx.c +++ b/x86/vmx.c @@ -143,6 +143,159 @@ asm( " call hypercall\n\t" ); +/* EPT paging structure related functions */ +/* install_ept_entry : Install a page to a given level in EPT + @pml4 : addr of pml4 table + @pte_level : level of PTE to set + @guest_addr : physical address of guest + @pte : pte value to set + @pt_page : address of page table, NULL for a new page + */ +void install_ept_entry(unsigned long *pml4, + int pte_level, + unsigned long guest_addr, + unsigned long pte, + unsigned long *pt_page) +{ + int level; + unsigned long *pt = pml4; + unsigned offset; + + for (level = EPT_PAGE_LEVEL; level > pte_level; --level) { + offset = (guest_addr >> ((level-1) * EPT_PGDIR_WIDTH + 12)) + & EPT_PGDIR_MASK; + if (!(pt[offset] & (EPT_PRESENT))) { + unsigned long *new_pt = pt_page; + if (!new_pt) + new_pt = alloc_page(); + else + pt_page = 0; + memset(new_pt, 0, PAGE_SIZE); + pt[offset] = virt_to_phys(new_pt) + | EPT_RA | EPT_WA | EPT_EA; + } + pt = phys_to_virt(pt[offset] & 0xffffffffff000ull); + } + offset = ((unsigned long)guest_addr >> ((level-1) * + EPT_PGDIR_WIDTH + 12)) & EPT_PGDIR_MASK; + pt[offset] = pte; +} + +/* Map a page, @perm is the permission of the page */ +void install_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 1, guest_addr, (phys & PAGE_MASK) | perm, 0); +} + +/* Map a 1G-size page */ +void install_1g_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 3, guest_addr, + (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); +} + +/* Map a 2M-size page */ +void install_2m_ept(unsigned long *pml4, + unsigned long phys, + unsigned long guest_addr, + u64 perm) +{ + install_ept_entry(pml4, 2, guest_addr, + (phys & PAGE_MASK) | perm | EPT_LARGE_PAGE, 0); +} + +/* setup_ept_range : Setup a range of 1:1 mapped page to EPT paging structure. + @start : start address of guest page + @len : length of address to be mapped + @map_1g : whether 1G page map is used + @map_2m : whether 2M page map is used + @perm : permission for every page + */ +int setup_ept_range(unsigned long *pml4, unsigned long start, + unsigned long len, int map_1g, int map_2m, u64 perm) +{ + u64 phys = start; + u64 max = (u64)len + (u64)start; + + if (map_1g) { + while (phys + PAGE_SIZE_1G <= max) { + install_1g_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE_1G; + } + } + if (map_2m) { + while (phys + PAGE_SIZE_2M <= max) { + install_2m_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE_2M; + } + } + while (phys + PAGE_SIZE <= max) { + install_ept(pml4, phys, phys, perm); + phys += PAGE_SIZE; + } + return 0; +} + +/* get_ept_pte : Get the PTE of a given level in EPT, + @level == 1 means get the latest level*/ +unsigned long get_ept_pte(unsigned long *pml4, + unsigned long guest_addr, int level) +{ + int l; + unsigned long *pt = pml4, pte; + unsigned offset; + + for (l = EPT_PAGE_LEVEL; l > 1; --l) { + offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12)) + & EPT_PGDIR_MASK; + pte = pt[offset]; + if (!(pte & (EPT_PRESENT))) + return 0; + if (l == level) + return pte; + if (l < 4 && (pte & EPT_LARGE_PAGE)) + return pte; + pt = (unsigned long *)(pte & 0xffffffffff000ull); + } + offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12)) + & EPT_PGDIR_MASK; + pte = pt[offset]; + return pte; +} + +int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, + int level, u64 pte_val) +{ + int l; + unsigned long *pt = pml4; + unsigned offset; + + if (level < 1 || level > 3) + return -1; + for (l = EPT_PAGE_LEVEL; l > 1; --l) { + offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12)) + & EPT_PGDIR_MASK; + if (l == level) { + pt[offset] = pte_val; + return 0; + } + if (!(pt[offset] & (EPT_PRESENT))) + return -1; + pt = (unsigned long *)(pt[offset] & 0xffffffffff000ull); + } + offset = (guest_addr >> (((l-1) * EPT_PGDIR_WIDTH) + 12)) + & EPT_PGDIR_MASK; + pt[offset] = pte_val; + return 0; +} + + static void init_vmcs_ctrl(void) { /* 26.2 CHECKS ON VMX CONTROLS AND HOST-STATE AREA */ @@ -336,10 +489,8 @@ static void init_vmx(void) : MSR_IA32_VMX_ENTRY_CTLS); ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC : MSR_IA32_VMX_PROCBASED_CTLS); - if (ctrl_cpu_rev[0].set & CPU_SECONDARY) - ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); - if (ctrl_cpu_rev[1].set & CPU_EPT || ctrl_cpu_rev[1].set & CPU_VPID) - ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); + ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2); + ept_vpid.val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set); write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE); diff --git a/x86/vmx.h b/x86/vmx.h index 28595d8..742c2b2 100644 --- a/x86/vmx.h +++ b/x86/vmx.h @@ -432,6 +432,59 @@ enum Ctrl1 { #define HYPERCALL_MASK 0xFFF #define HYPERCALL_VMEXIT 0x1 +#define EPTP_PG_WALK_LEN_SHIFT 3ul +#define EPTP_AD_FLAG (1ul << 6) + +#define EPT_MEM_TYPE_UC 0ul +#define EPT_MEM_TYPE_WC 1ul +#define EPT_MEM_TYPE_WT 4ul +#define EPT_MEM_TYPE_WP 5ul +#define EPT_MEM_TYPE_WB 6ul + +#define EPT_RA 1ul +#define EPT_WA 2ul +#define EPT_EA 4ul +#define EPT_PRESENT (EPT_RA | EPT_WA | EPT_EA) +#define EPT_ACCESS_FLAG (1ul << 8) +#define EPT_DIRTY_FLAG (1ul << 9) +#define EPT_LARGE_PAGE (1ul << 7) +#define EPT_MEM_TYPE_SHIFT 3ul +#define EPT_IGNORE_PAT (1ul << 6) +#define EPT_SUPPRESS_VE (1ull << 63) + +#define EPT_CAP_WT 1ull +#define EPT_CAP_PWL4 (1ull << 6) +#define EPT_CAP_UC (1ull << 8) +#define EPT_CAP_WB (1ull << 14) +#define EPT_CAP_2M_PAGE (1ull << 16) +#define EPT_CAP_1G_PAGE (1ull << 17) +#define EPT_CAP_INVEPT (1ull << 20) +#define EPT_CAP_INVEPT_SINGLE (1ull << 25) +#define EPT_CAP_INVEPT_ALL (1ull << 26) +#define EPT_CAP_AD_FLAG (1ull << 21) + +#define PAGE_SIZE_2M (512 * PAGE_SIZE) +#define PAGE_SIZE_1G (512 * PAGE_SIZE_2M) +#define EPT_PAGE_LEVEL 4 +#define EPT_PGDIR_WIDTH 9 +#define EPT_PGDIR_MASK 511 +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define EPT_VLT_RD 1 +#define EPT_VLT_WR (1 << 1) +#define EPT_VLT_FETCH (1 << 2) +#define EPT_VLT_PERM_RD (1 << 3) +#define EPT_VLT_PERM_WR (1 << 4) +#define EPT_VLT_PERM_EX (1 << 5) +#define EPT_VLT_LADDR_VLD (1 << 7) +#define EPT_VLT_PADDR (1 << 8) + +#define MAGIC_VAL_1 0x12345678ul +#define MAGIC_VAL_2 0x87654321ul +#define MAGIC_VAL_3 0xfffffffful + +#define INVEPT_SINGLE 1 +#define INVEPT_GLOBAL 2 extern struct regs regs; @@ -472,8 +525,31 @@ static inline int vmcs_save(struct vmcs **vmcs) return ret; } +static inline void invept(unsigned long type, u64 eptp) +{ + struct { + u64 eptp, gpa; + } operand = {eptp, 0}; + asm volatile("invept %0, %1\n" ::"m"(operand),"r"(type)); +} + void report(const char *name, int result); void print_vmexit_info(); +void install_ept_entry(unsigned long *pml4, int pte_level, + unsigned long guest_addr, unsigned long pte, + unsigned long *pt_page); +void install_1g_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void install_2m_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +void install_ept(unsigned long *pml4, unsigned long phys, + unsigned long guest_addr, u64 perm); +int setup_ept_range(unsigned long *pml4, unsigned long start, + unsigned long len, int map_1g, int map_2m, u64 perm); +unsigned long get_ept_pte(unsigned long *pml4, + unsigned long guest_addr, int level); +int set_ept_pte(unsigned long *pml4, unsigned long guest_addr, + int level, u64 pte_val); #endif -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html