Our QE team reported test failure on access_tracking_perf_test: Testing guest mode: PA-bits:ANY, VA-bits:48, 4K pages guest physical test memory offset: 0x3fffbffff000 Populating memory : 0.684014577s Writing to populated memory : 0.006230175s Reading from populated memory : 0.004557805s ==== Test Assertion Failure ==== lib/kvm_util.c:1411: false pid=125806 tid=125809 errno=4 - Interrupted system call 1 0x0000000000402f7c: addr_gpa2hva at kvm_util.c:1411 2 (inlined by) addr_gpa2hva at kvm_util.c:1405 3 0x0000000000401f52: lookup_pfn at access_tracking_perf_test.c:98 4 (inlined by) mark_vcpu_memory_idle at access_tracking_perf_test.c:152 5 (inlined by) vcpu_thread_main at access_tracking_perf_test.c:232 6 0x00007fefe9ff81ce: ?? ??:0 7 0x00007fefe9c64d82: ?? ??:0 No vm physical memory at 0xffbffff000 I can easily reproduce it with a Intel(R) Xeon(R) CPU E5-2630 with 46 bits PA. It turns out that the address translation for clearing idle page tracking returned wrong result, in which addr_gva2gpa()'s last step (upon "pte[index[0]].pfn") did the calculation with 40 bits length so the overflowed bits got cut off. In above case the GPA address to be returned should be 0x3fffbffff000 for GVA 0xc0000000, but it got cut-off into 0xffbffff000, then it caused further lookup failure in the gpa2hva mapping. Fix it by forcing all ->pfn fetching for all layers of pgtables with force cast to uint64_t. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2075036 Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> --- .../selftests/kvm/lib/x86_64/processor.c | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 9f000dfb5594..32832d1f9acb 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -51,6 +51,18 @@ struct pageTableEntry { uint64_t execute_disable:1; }; +/* + * Let's always remember to reference PFN within pgtables using this macro. + * It's complier's choice to decide whether further calculation upon the + * pfn field will also be limited to 40 bits (which is the bit length + * defined for .pfn in either pageUpperEntry or pageTableEntry) so the + * output could overflow. For example, gcc version 11.1.1 20210428 will do + * the cut-off, while clang version 12.0.1 does not. + * + * To make it always work, force a cast. + */ +#define __get_pfn(entry) ((uint64_t)(entry.pfn)) + void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -335,7 +347,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc (rsvd_mask | (1ull << 7))) == 0, "Unexpected reserved bits set."); - pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size); + pdpe = addr_gpa2hva(vm, __get_pfn(pml4e[index[3]]) * vm->page_size); TEST_ASSERT(pdpe[index[2]].present, "Expected pdpe to be present for gva: 0x%08lx", vaddr); TEST_ASSERT(pdpe[index[2]].page_size == 0, @@ -343,7 +355,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0, "Unexpected reserved bits set."); - pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size); + pde = addr_gpa2hva(vm, __get_pfn(pdpe[index[2]]) * vm->page_size); TEST_ASSERT(pde[index[1]].present, "Expected pde to be present for gva: 0x%08lx", vaddr); TEST_ASSERT(pde[index[1]].page_size == 0, @@ -351,7 +363,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0, "Unexpected reserved bits set."); - pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size); + pte = addr_gpa2hva(vm, __get_pfn(pde[index[1]]) * vm->page_size); TEST_ASSERT(pte[index[0]].present, "Expected pte to be present for gva: 0x%08lx", vaddr); @@ -575,19 +587,19 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) if (!pml4e[index[3]].present) goto unmapped_gva; - pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size); + pdpe = addr_gpa2hva(vm, __get_pfn(pml4e[index[3]]) * vm->page_size); if (!pdpe[index[2]].present) goto unmapped_gva; - pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size); + pde = addr_gpa2hva(vm, __get_pfn(pdpe[index[2]]) * vm->page_size); if (!pde[index[1]].present) goto unmapped_gva; - pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size); + pte = addr_gpa2hva(vm, __get_pfn(pde[index[1]]) * vm->page_size); if (!pte[index[0]].present) goto unmapped_gva; - return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu); + return (__get_pfn(pte[index[0]]) * vm->page_size) + (gva & 0xfffu); unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); -- 2.32.0