[kvm-unit-tests PATCH 13/39] x86/access: Pre-allocate all page tables at (sub)test init

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Pre-allocate the page tables for each test instead of allocating page
tables on every. single. iteration.  In addition to being abysmally slow,
constantly allocating new page tables obliterates any hope of providing
meaningful test coverage for shadow paging, as using a new upper level
PTE for every iteration causes KVM to sync children, which prevents
exposing TLB flushing bugs in KVM.

Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
---
 x86/access.c | 169 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 101 insertions(+), 68 deletions(-)

diff --git a/x86/access.c b/x86/access.c
index 6c1e20e..abc6590 100644
--- a/x86/access.c
+++ b/x86/access.c
@@ -176,6 +176,9 @@ typedef struct {
 	int expected_fault;
 	unsigned expected_error;
 	int pt_levels;
+
+	/* 5-level paging, 1-based to avoid math. */
+	pt_element_t page_tables[6];
 } ac_test_t;
 
 typedef struct {
@@ -323,25 +326,25 @@ static pt_element_t ac_test_alloc_pt(ac_pt_env_t *pt_env)
 {
 	pt_element_t pt;
 
+	/*
+	 * Each test needs at most pt_levels-1 structures per virtual address,
+	 * and no existing scenario uses more than four addresses.
+	 */
+	assert(pt_env->pt_pool_current < (4 * (pt_env->pt_levels - 1)));
+
 	pt = pt_env->pt_pool_pa + (pt_env->pt_pool_current * PAGE_SIZE);
 	pt_env->pt_pool_current++;
 	memset(va(pt), 0, PAGE_SIZE);
 	return pt;
 }
 
-static _Bool ac_test_enough_room(ac_pt_env_t *pt_env)
+static void __ac_test_init(ac_test_t *at, unsigned long virt,
+			   ac_pt_env_t *pt_env, ac_test_t *buddy)
 {
-	/* '120' is completely arbitrary. */
-	return (pt_env->pt_pool_current + 5) < 120;
-}
+	unsigned long buddy_virt = buddy ? (unsigned long)buddy->virt : 0;
+	pt_element_t *root_pt = va(shadow_cr3 & PT_BASE_ADDR_MASK);
+	int i;
 
-static void ac_test_reset_pt_pool(ac_pt_env_t *pt_env)
-{
-	pt_env->pt_pool_current = 0;
-}
-
-static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env)
-{
 	/*
 	 * The KUT infrastructure, e.g. this function, must use a different
 	 * top-level SPTE than the test, otherwise modifying SPTEs can affect
@@ -349,7 +352,7 @@ static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env)
 	 * USER when CR4.SMEP=1.
 	 */
 	assert(PT_INDEX(virt, pt_env->pt_levels) !=
-	       PT_INDEX((unsigned long)ac_test_init, pt_env->pt_levels));
+	       PT_INDEX((unsigned long)__ac_test_init, pt_env->pt_levels));
 
 	set_efer_nx(1);
 	set_cr0_wp(1);
@@ -357,6 +360,33 @@ static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env)
 	at->virt = (void *)virt;
 	at->phys = AT_CODE_DATA_PHYS;
 	at->pt_levels = pt_env->pt_levels;
+
+	at->page_tables[0] = -1ull;
+	at->page_tables[1] = -1ull;
+
+	/*
+	 * Zap the existing top-level PTE as it may be reused from a previous
+	 * sub-test.  This allows runtime PTE modification to assert that two
+	 * overlapping walks don't try to install different paging structures.
+	 */
+	root_pt[PT_INDEX(virt, pt_env->pt_levels)] = 0;
+
+	for (i = at->pt_levels; i > 1; i--) {
+		/*
+		 * Buddies can reuse any part of the walk that share the same
+		 * index.  This is weird, but intentional, as several tests
+		 * want different walks to merge at lower levels.
+		 */
+		if (buddy && PT_INDEX(virt, i) == PT_INDEX(buddy_virt, i))
+			at->page_tables[i] = buddy->page_tables[i];
+		else
+			at->page_tables[i] = ac_test_alloc_pt(pt_env);
+	}
+}
+
+static void ac_test_init(ac_test_t *at, unsigned long virt, ac_pt_env_t *pt_env)
+{
+	__ac_test_init(at, virt, pt_env, NULL);
 }
 
 static int ac_test_bump_one(ac_test_t *at)
@@ -372,6 +402,9 @@ static _Bool ac_test_legal(ac_test_t *at)
 	int flags = at->flags;
 	unsigned reserved;
 
+	if (F(AC_CPU_CR4_SMEP))
+		return false;
+
 	if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
 		return false;
 
@@ -562,59 +595,60 @@ static void ac_set_expected_status(ac_test_t *at)
 	ac_emulate_access(at, at->flags);
 }
 
-static void __ac_setup_specific_pages(ac_test_t *at, ac_pt_env_t *pt_env, bool reuse,
-				      u64 pd_page, u64 pt_page)
+static pt_element_t ac_get_pt(ac_test_t *at, int i, pt_element_t *ptep)
+{
+	pt_element_t pte;
+
+	pte = *ptep;
+	if (pte && !(pte & PT_PAGE_SIZE_MASK) &&
+	    (pte & PT_BASE_ADDR_MASK) != at->page_tables[i]) {
+		printf("\nPT collision.  VA = 0x%lx, level = %d, index = %ld, found PT = 0x%lx, want PT = 0x%lx\n",
+			(unsigned long)at->virt, i,
+			PT_INDEX((unsigned long)at->virt, i),
+			pte, at->page_tables[i]);
+		abort();
+	}
+
+	pte = at->page_tables[i];
+	return pte;
+}
+
+static void __ac_setup_specific_pages(ac_test_t *at, u64 pd_page, u64 pt_page)
 {
 	unsigned long parent_pte = shadow_cr3;
 	int flags = at->flags;
-	bool skip = true;
-
-	if (!ac_test_enough_room(pt_env))
-		ac_test_reset_pt_pool(pt_env);
+	int i;
 
 	at->ptep = 0;
-	for (int i = at->pt_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
+	for (i = at->pt_levels; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
 		pt_element_t *parent_pt = va(parent_pte & PT_BASE_ADDR_MASK);
 		unsigned index = PT_INDEX((unsigned long)at->virt, i);
 		pt_element_t *ptep = &parent_pt[index];
 		pt_element_t pte;
 
-		/*
-		 * Reuse existing page tables along the highest index, some
-		 * tests rely on sharing upper level paging structures between
-		 * two separate sub-tests.
-		 */
-		if (skip && i >= 2 && index == 511 && (*ptep & PT_PRESENT_MASK))
-			goto next;
-
-		skip = false;
-		if (reuse && *ptep) {
-			switch (i) {
-			case 2:
-				at->pdep = ptep;
-				break;
-			case 1:
-				at->ptep = ptep;
-				break;
-			}
-			goto next;
-		}
-
 		switch (i) {
 		case 5:
 		case 4:
-			pte = ac_test_alloc_pt(pt_env);
+			pte = ac_get_pt(at, i, ptep);
 			pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 			break;
 		case 3:
-			pte = pd_page ? pd_page : ac_test_alloc_pt(pt_env);
+			if (pd_page)
+				pte = pd_page;
+			else
+				pte = ac_get_pt(at, i, ptep);
+
 			pte |= PT_PRESENT_MASK | PT_USER_MASK;
 			if (!F(AC_PDPTE_NO_WRITABLE))
 				pte |= PT_WRITABLE_MASK;
 			break;
 		case 2:
 			if (!F(AC_PDE_PSE)) {
-				pte = pt_page ? pt_page : ac_test_alloc_pt(pt_env);
+				if (pt_page)
+					pte = pt_page;
+				else
+					pte = ac_get_pt(at, i, ptep);
+
 				/* The protection key is ignored on non-leaf entries.  */
 				if (F(AC_PKU_PKEY))
 					pte |= 2ull << 59;
@@ -671,21 +705,20 @@ static void __ac_setup_specific_pages(ac_test_t *at, ac_pt_env_t *pt_env, bool r
 		}
 
 		*ptep = pte;
- next:
-		parent_pte = *ptep;
+
+		parent_pte = pte;
 	}
 	ac_set_expected_status(at);
 }
 
-static void ac_test_setup_pte(ac_test_t *at, ac_pt_env_t *pt_env)
+static void ac_test_setup_pte(ac_test_t *at)
 {
-	__ac_setup_specific_pages(at, pt_env, false, 0, 0);
+	__ac_setup_specific_pages(at, 0, 0);
 }
 
-static void ac_setup_specific_pages(ac_test_t *at, ac_pt_env_t *pt_env,
-				    u64 pd_page, u64 pt_page)
+static void ac_setup_specific_pages(ac_test_t *at, u64 pd_page, u64 pt_page)
 {
-	return __ac_setup_specific_pages(at, pt_env, false, pd_page, pt_page);
+	return __ac_setup_specific_pages(at, pd_page, pt_page);
 }
 
 static void __dump_pte(pt_element_t *ptep, int level, unsigned long virt)
@@ -874,15 +907,15 @@ static int corrupt_hugepage_triger(ac_pt_env_t *pt_env)
 	ac_test_t at1, at2;
 
 	ac_test_init(&at1, 0xffff923400000000ul, pt_env);
-	ac_test_init(&at2, 0xffffe66600000000ul, pt_env);
+	__ac_test_init(&at2, 0xffffe66600000000ul, pt_env, &at1);
 
 	at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
-	ac_test_setup_pte(&at2, pt_env);
+	ac_test_setup_pte(&at2);
 	if (!ac_test_do_access(&at2))
 		goto err;
 
 	at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
-	ac_test_setup_pte(&at1, pt_env);
+	ac_test_setup_pte(&at1);
 	if (!ac_test_do_access(&at1))
 		goto err;
 
@@ -912,13 +945,13 @@ static int check_pfec_on_prefetch_pte(ac_pt_env_t *pt_env)
 	ac_test_t at1, at2;
 
 	ac_test_init(&at1, 0xffff923406001000ul, pt_env);
-	ac_test_init(&at2, 0xffff923406003000ul, pt_env);
+	__ac_test_init(&at2, 0xffff923406003000ul, pt_env, &at1);
 
 	at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
-	ac_setup_specific_pages(&at1, pt_env, 30 * 1024 * 1024, 30 * 1024 * 1024);
+	ac_setup_specific_pages(&at1, 30 * 1024 * 1024, 30 * 1024 * 1024);
 
 	at2.flags = at1.flags | AC_PTE_NX_MASK;
-	ac_setup_specific_pages(&at2, pt_env, 30 * 1024 * 1024, 30 * 1024 * 1024);
+	ac_setup_specific_pages(&at2, 30 * 1024 * 1024, 30 * 1024 * 1024);
 
 	if (!ac_test_do_access(&at1)) {
 		printf("%s: prepare fail\n", __FUNCTION__);
@@ -957,17 +990,17 @@ static int check_large_pte_dirty_for_nowp(ac_pt_env_t *pt_env)
 	ac_test_t at1, at2;
 
 	ac_test_init(&at1, 0xffff923403000000ul, pt_env);
-	ac_test_init(&at2, 0xffffe66606000000ul, pt_env);
+	__ac_test_init(&at2, 0xffffe66606000000ul, pt_env, &at1);
 
 	at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
-	ac_test_setup_pte(&at2, pt_env);
+	ac_test_setup_pte(&at2);
 	if (!ac_test_do_access(&at2)) {
 		printf("%s: read on the first mapping fail.\n", __FUNCTION__);
 		goto err;
 	}
 
 	at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
-	ac_test_setup_pte(&at1, pt_env);
+	ac_test_setup_pte(&at1);
 	if (!ac_test_do_access(&at1)) {
 		printf("%s: write on the second mapping fail.\n", __FUNCTION__);
 		goto err;
@@ -1003,7 +1036,7 @@ static int check_smep_andnot_wp(ac_pt_env_t *pt_env)
 		    AC_CPU_CR4_SMEP_MASK |
 		    AC_CPU_CR0_WP_MASK |
 		    AC_ACCESS_WRITE_MASK;
-	ac_test_setup_pte(&at1, pt_env);
+	ac_test_setup_pte(&at1);
 
 	/*
 	 * Here we write the ro user page when
@@ -1062,19 +1095,19 @@ static int check_effective_sp_permissions(ac_pt_env_t *pt_env)
 		    AC_PDE_USER_MASK | AC_PTE_USER_MASK |
 		    AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
 		    AC_PTE_WRITABLE_MASK | AC_ACCESS_USER_MASK;
-	__ac_setup_specific_pages(&at1, pt_env, false, pmd, 0);
+	__ac_setup_specific_pages(&at1, pmd, 0);
 
-	ac_test_init(&at2, ptr2, pt_env);
+	__ac_test_init(&at2, ptr2, pt_env, &at1);
 	at2.flags = at1.flags | AC_PDE_WRITABLE_MASK | AC_PTE_DIRTY_MASK | AC_ACCESS_WRITE_MASK;
-	__ac_setup_specific_pages(&at2, pt_env, true, pmd, 0);
+	__ac_setup_specific_pages(&at2, pmd, 0);
 
-	ac_test_init(&at3, ptr3, pt_env);
+	__ac_test_init(&at3, ptr3, pt_env, &at1);
 	at3.flags = AC_PDPTE_NO_WRITABLE_MASK | at1.flags;
-	__ac_setup_specific_pages(&at3, pt_env, true, pmd, 0);
+	__ac_setup_specific_pages(&at3, pmd, 0);
 
-	ac_test_init(&at4, ptr4, pt_env);
+	__ac_test_init(&at4, ptr4, pt_env, &at2);
 	at4.flags = AC_PDPTE_NO_WRITABLE_MASK | at2.flags;
-	__ac_setup_specific_pages(&at4, pt_env, true, pmd, 0);
+	__ac_setup_specific_pages(&at4, pmd, 0);
 
 	err_read_at1 = ac_test_do_access(&at1);
 	if (!err_read_at1) {
@@ -1110,7 +1143,7 @@ static int ac_test_exec(ac_test_t *at, ac_pt_env_t *pt_env)
 	if (verbose) {
 		ac_test_show(at);
 	}
-	ac_test_setup_pte(at, pt_env);
+	ac_test_setup_pte(at);
 	r = ac_test_do_access(at);
 	return r;
 }
-- 
2.34.0.rc2.393.gf8c9666880-goog




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux