On Wed, Sep 11, 2024 at 1:42 PM Sean Christopherson <seanjc@xxxxxxxxxx> wrote: > > Add two phases to mmu_stress_test to verify that KVM correctly handles > guest memory that was writable, and then made read-only in the primary MMU, > and then made writable again. > > Add bonus coverage for x86 and arm64 to verify that all of guest memory was > marked read-only. Making forward progress (without making memory writable) > requires arch specific code to skip over the faulting instruction, but the > test can at least verify each vCPU's starting page was made read-only for > other architectures. > > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > tools/testing/selftests/kvm/mmu_stress_test.c | 104 +++++++++++++++++- > 1 file changed, 101 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c > index 50c3a17418c4..c07c15d7cc9a 100644 > --- a/tools/testing/selftests/kvm/mmu_stress_test.c > +++ b/tools/testing/selftests/kvm/mmu_stress_test.c > @@ -16,6 +16,8 @@ > #include "guest_modes.h" > #include "processor.h" > > +static bool mprotect_ro_done; > + > static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) > { > uint64_t gpa; > @@ -31,6 +33,42 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) > *((volatile uint64_t *)gpa); > GUEST_SYNC(2); > > + /* > + * Write to the region while mprotect(PROT_READ) is underway. Keep > + * looping until the memory is guaranteed to be read-only, otherwise > + * vCPUs may complete their writes and advance to the next stage > + * prematurely. > + * > + * For architectures that support skipping the faulting instruction, > + * generate the store via inline assembly to ensure the exact length > + * of the instruction is known and stable (vcpu_arch_put_guest() on > + * fixed-length architectures should work, but the cost of paranoia > + * is low in this case). For x86, hand-code the exact opcode so that > + * there is no room for variability in the generated instruction. > + */ > + do { > + for (gpa = start_gpa; gpa < end_gpa; gpa += stride) > +#ifdef __x86_64__ > + asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ I'm curious what you think about using labels (in asm, but perhaps also in C) and *setting* the PC instead of incrementing the PC. Diff attached (tested on x86). It might even be safe/okay to always use vcpu_arch_put_guest(), just set the PC to a label immediately following it. I don't feel strongly, so feel free to ignore. > +#elif defined(__aarch64__) > + asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); > +#else > + vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); > +#endif > + } while (!READ_ONCE(mprotect_ro_done)); > + > + /* > + * Only architectures that write the entire range can explicitly sync, > + * as other architectures will be stuck on the write fault. > + */ > +#if defined(__x86_64__) || defined(__aarch64__) > + GUEST_SYNC(3); > +#endif
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index c07c15d7cc9a..a7cced5c3e67 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -16,8 +16,13 @@ #include "guest_modes.h" #include "processor.h" +#if defined(__x86_64__) || defined(__aarch64__) +extern void *skip_page; +#endif + static bool mprotect_ro_done; + static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { uint64_t gpa; @@ -40,18 +45,21 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) * prematurely. * * For architectures that support skipping the faulting instruction, - * generate the store via inline assembly to ensure the exact length - * of the instruction is known and stable (vcpu_arch_put_guest() on - * fixed-length architectures should work, but the cost of paranoia - * is low in this case). For x86, hand-code the exact opcode so that - * there is no room for variability in the generated instruction. + * generate the store via inline assembly to ensure we can correctly + * adjust the PC upon faulting. */ do { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) #ifdef __x86_64__ - asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */ + asm volatile(".global skip_page;" + "mov %0, (%0);" + "skip_page:;" + :: "r" (gpa) : "memory"); #elif defined(__aarch64__) - asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); + asm volatile(".global skip_page;" + "str %0, [%0];" + "skip_page:;" + :: "r" (gpa) : "memory"); #else vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); #endif @@ -170,10 +178,10 @@ static void *vcpu_worker(void *data) TEST_ASSERT_EQ(errno, EFAULT); #if defined(__x86_64__) WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS); - vcpu->run->s.regs.regs.rip += 3; + vcpu->run->s.regs.regs.rip = (vm_vaddr_t)&skip_page; #elif defined(__aarch64__) vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), - vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4); + (vm_vaddr_t)&skip_page); #endif }