Re: [kvm-unit-tests PATCH] x86: vmx: Add test for MTF on a guest MOV-to-CR0 that enables PAE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> On Oct 10, 2020, at 1:17 AM, Nadav Amit <nadav.amit@xxxxxxxxx> wrote:
> 
>> On Aug 17, 2020, at 5:25 PM, Peter Shier <pshier@xxxxxxxxxx> wrote:
>> 
>> Verify that when L2 guest enables PAE paging and L0 intercept of L2
>> MOV to CR0 reflects MTF exit to L1, subsequent resume to L2 correctly
>> preserves PDPTE array specified by L2 CR3.
>> 
>> Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx>
>> Reviewed-by:   Peter Shier <pshier@xxxxxxxxxx>
>> Signed-off-by: Peter Shier <pshier@xxxxxxxxxx>
>> ---
>> lib/x86/asm/page.h |   8 +++
>> x86/vmx_tests.c    | 171 +++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 179 insertions(+)
>> 
>> diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
>> index 7e2a3dd4b90a..1359eb74cde4 100644
>> --- a/lib/x86/asm/page.h
>> +++ b/lib/x86/asm/page.h
>> @@ -36,10 +36,18 @@ typedef unsigned long pgd_t;
>> #define PT64_NX_MASK		(1ull << 63)
>> #define PT_ADDR_MASK		GENMASK_ULL(51, 12)
>> 
>> +#define PDPTE64_PAGE_SIZE_MASK	  (1ull << 7)
>> +#define PDPTE64_RSVD_MASK	  GENMASK_ULL(51, cpuid_maxphyaddr())
>> +
>> #define PT_AD_MASK              (PT_ACCESSED_MASK | PT_DIRTY_MASK)
>> 
>> +#define PAE_PDPTE_RSVD_MASK     (GENMASK_ULL(63, cpuid_maxphyaddr()) |	\
>> +				 GENMASK_ULL(8, 5) | GENMASK_ULL(2, 1))
>> +
>> +
>> #ifdef __x86_64__
>> #define	PAGE_LEVEL	4
>> +#define	PDPT_LEVEL	3
>> #define	PGDIR_WIDTH	9
>> #define	PGDIR_MASK	511
>> #else
>> diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
>> index 32e3d4f47b33..372e5efb6b5f 100644
>> --- a/x86/vmx_tests.c
>> +++ b/x86/vmx_tests.c
>> @@ -5250,6 +5250,176 @@ static void vmx_mtf_test(void)
>> 	enter_guest();
>> }
>> 
>> +extern char vmx_mtf_pdpte_guest_begin;
>> +extern char vmx_mtf_pdpte_guest_end;
>> +
>> +asm("vmx_mtf_pdpte_guest_begin:\n\t"
>> +    "mov %cr0, %rax\n\t"    /* save CR0 with PG=1                 */
>> +    "vmcall\n\t"            /* on return from this CR0.PG=0       */
>> +    "mov %rax, %cr0\n\t"    /* restore CR0.PG=1 to enter PAE mode */
>> +    "vmcall\n\t"
>> +    "retq\n\t"
>> +    "vmx_mtf_pdpte_guest_end:");
>> +
>> +static void vmx_mtf_pdpte_test(void)
>> +{
>> +	void *test_mtf_pdpte_guest;
>> +	pteval_t *pdpt;
>> +	u32 guest_ar_cs;
>> +	u64 guest_efer;
>> +	pteval_t *pte;
>> +	u64 guest_cr0;
>> +	u64 guest_cr3;
>> +	u64 guest_cr4;
>> +	u64 ent_ctls;
>> +	int i;
>> +
>> +	if (setup_ept(false))
>> +		return;
>> +
>> +	if (!(ctrl_cpu_rev[0].clr & CPU_MTF)) {
>> +		printf("CPU does not support 'monitor trap flag.'\n");
>> +		return;
>> +	}
>> +
>> +	if (!(ctrl_cpu_rev[1].clr & CPU_URG)) {
>> +		printf("CPU does not support 'unrestricted guest.'\n");
>> +		return;
>> +	}
>> +
>> +	vmcs_write(EXC_BITMAP, ~0);
>> +	vmcs_write(CPU_EXEC_CTRL1, vmcs_read(CPU_EXEC_CTRL1) | CPU_URG);
>> +
>> +	/*
>> +	 * Copy the guest code to an identity-mapped page.
>> +	 */
>> +	test_mtf_pdpte_guest = alloc_page();
>> +	memcpy(test_mtf_pdpte_guest, &vmx_mtf_pdpte_guest_begin,
>> +	       &vmx_mtf_pdpte_guest_end - &vmx_mtf_pdpte_guest_begin);
>> +
>> +	test_set_guest(test_mtf_pdpte_guest);
>> +
>> +	enter_guest();
>> +	skip_exit_vmcall();
>> +
>> +	/*
>> +	 * Put the guest in non-paged 32-bit protected mode, ready to enter
>> +	 * PAE mode when CR0.PG is set. CR4.PAE will already have been set
>> +	 * when the guest started out in long mode.
>> +	 */
>> +	ent_ctls = vmcs_read(ENT_CONTROLS);
>> +	vmcs_write(ENT_CONTROLS, ent_ctls & ~ENT_GUEST_64);
>> +
>> +	guest_efer = vmcs_read(GUEST_EFER);
>> +	vmcs_write(GUEST_EFER, guest_efer & ~(EFER_LMA | EFER_LME));
>> +
>> +	/*
>> +	 * Set CS access rights bits for 32-bit protected mode:
>> +	 * 3:0    B execute/read/accessed
>> +	 * 4      1 code or data
>> +	 * 6:5    0 descriptor privilege level
>> +	 * 7      1 present
>> +	 * 11:8   0 reserved
>> +	 * 12     0 available for use by system software
>> +	 * 13     0 64 bit mode not active
>> +	 * 14     1 default operation size 32-bit segment
>> +	 * 15     1 page granularity: segment limit in 4K units
>> +	 * 16     0 segment usable
>> +	 * 31:17  0 reserved
>> +	 */
>> +	guest_ar_cs = vmcs_read(GUEST_AR_CS);
>> +	vmcs_write(GUEST_AR_CS, 0xc09b);
>> +
>> +	guest_cr0 = vmcs_read(GUEST_CR0);
>> +	vmcs_write(GUEST_CR0, guest_cr0 & ~X86_CR0_PG);
>> +
>> +	guest_cr4 = vmcs_read(GUEST_CR4);
>> +	vmcs_write(GUEST_CR4, guest_cr4 & ~X86_CR4_PCIDE);
>> +
>> +	guest_cr3 = vmcs_read(GUEST_CR3);
>> +
>> +	/*
>> +	 * Turn the 4-level page table into a PAE page table by following the 0th
>> +	 * PML4 entry to a PDPT page, and grab the first four PDPTEs from that
>> +	 * page.
>> +	 *
>> +	 * Why does this work?
>> +	 *
>> +	 * PAE uses 32-bit addressing which implies:
>> +	 * Bits 11:0   page offset
>> +	 * Bits 20:12  entry into 512-entry page table
>> +	 * Bits 29:21  entry into a 512-entry directory table
>> +	 * Bits 31:30  entry into the page directory pointer table.
>> +	 * Bits 63:32  zero
>> +	 *
>> +	 * As only 2 bits are needed to select the PDPTEs for the entire
>> +	 * 32-bit address space, take the first 4 PDPTEs in the level 3 page
>> +	 * directory pointer table. It doesn't matter which of these PDPTEs
>> +	 * are present because they must cover the guest code given that it
>> +	 * has already run successfully.
>> +	 *
>> +	 * Get a pointer to PTE for GVA=0 in the page directory pointer table
>> +	 */
>> +	pte = get_pte_level((pgd_t *)(guest_cr3 & ~X86_CR3_PCID_MASK), 0, PDPT_LEVEL);
>> +
>> +	/*
>> +	 * Need some memory for the 4-entry PAE page directory pointer
>> +	 * table. Use the end of the identity-mapped page where the guest code
>> +	 * is stored. There is definitely space as the guest code is only a
>> +	 * few bytes.
>> +	 */
>> +	pdpt = test_mtf_pdpte_guest + PAGE_SIZE - 4 * sizeof(pteval_t);
>> +
>> +	/*
>> +	 * Copy the first four PDPTEs into the PAE page table with reserved
>> +	 * bits cleared. Note that permission bits from the PML4E and PDPTE
>> +	 * are not propagated.
>> +	 */
>> +	for (i = 0; i < 4; i++) {
>> +		TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_RSVD_MASK),
>> +				   "PDPTE has invalid reserved bits");
>> +		TEST_ASSERT_EQ_MSG(0, (pte[i] & PDPTE64_PAGE_SIZE_MASK),
>> +				   "Cannot use 1GB super pages for PAE");
>> +		pdpt[i] = pte[i] & ~(PAE_PDPTE_RSVD_MASK);
>> +	}
>> +	vmcs_write(GUEST_CR3, virt_to_phys(pdpt));
>> +
>> +	enable_mtf();
>> +	enter_guest();
> 
> This entry failed on my bare-metal machine:
> 
> Test suite: vmx_mtf_pdpte_test
> VM-Exit failure on vmresume (reason=0x80000021, qual=0): invalid guest state
> 
> Any idea why?

I guess that the test makes an assumption that there are no addresses
greater than 4GB. When I reduce the size of the memory, the test passes.





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux