On 22/06/20 18:55, Maxim Levitsky wrote: > +/* > + * Detect nested guest RIP corruption as explained in kernel commit > + * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73 > + * > + * In the assembly loop below, execute 'ins' from a IO port, > + * while not intercepting IO violations, so that this instruction is > + * intercepted and emulated by the L0 qemu. > + * > + * At the same time we are getting interrupts from the local APIC timer, > + * and we do intercept them in L1 > + * > + * If interrupt happens on the insb instruction, L0 will VMexit, emulate > + * the insb instruction and then it will try to inject the interrupt to L1 > + * by doing a nested VMexit (since L1 intercepts interrupts), > + * and due to a bug it will use pre-emulation value of RIP,RAX and RSP. > + * > + * In our intercept handler we check that RIP is of the insb instruction, > + * (corrupted) but its memory operand is already written meaning, > + * that insb was already executed. > + */ Looks good, just some wording improvements diff --git a/x86/svm_tests.c b/x86/svm_tests.c index 30009c4..827ff87 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -1793,21 +1793,20 @@ static bool virq_inject_check(struct svm_test *test) * Detect nested guest RIP corruption as explained in kernel commit * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73 * - * In the assembly loop below, execute 'ins' from a IO port, - * while not intercepting IO violations, so that this instruction is - * intercepted and emulated by the L0 qemu. + * In the assembly loop below 'ins' is executed while IO instructions + * are not intercepted; the instruction is emulated by L0. * * At the same time we are getting interrupts from the local APIC timer, * and we do intercept them in L1 * - * If interrupt happens on the insb instruction, L0 will VMexit, emulate - * the insb instruction and then it will try to inject the interrupt to L1 - * by doing a nested VMexit (since L1 intercepts interrupts), - * and due to a bug it will use pre-emulation value of RIP,RAX and RSP. + * If the interrupt happens on the insb instruction, L0 will VMexit, emulate + * the insb instruction and then it will inject the interrupt to L1 through + * a nested VMexit. Due to a bug, it would leave pre-emulation values of RIP, + * RAX and RSP in the VMCB. * - * In our intercept handler we check that RIP is of the insb instruction, - * (corrupted) but its memory operand is already written meaning, - * that insb was already executed. + * In our intercept handler we detect the bug by checking that RIP is that of + * the insb instruction, but its memory operand has already been written. + * This means that insb was already executed. */ > +static void reg_corruption_test(struct svm_test *test) > +{ > + /* this is endless loop, which is interrupted by the timer interrupt */ > + asm volatile ( > + "again:\n\t" > + "movw $0x4d0, %%dx\n\t" // IO port > + "lea %[_io_port_var], %%rdi\n\t" > + "movb $0xAA, %[_io_port_var]\n\t" > + "insb_instruction_label:\n\t" > + "insb\n\t" > + "jmp again\n\t" And here you could use "1:" and "jmp 1b" instead of a global label. You said offlist that an "inb" instruction would not work, but I'm not sure I understand why. Wouldn't you see "0xAA" in vmcb->save.rax with the fixed kernel, and something else with the broken one? Paolo > + > + : [_io_port_var] "=m" (io_port_var) > + : /* no inputs*/ > + : "rdx", "rdi" > + ); > +} > + > +static bool reg_corruption_finished(struct svm_test *test) > +{ > + if (isr_cnt == 10000) { > + report(true, > + "No RIP corruption detected after %d timer interrupts", > + isr_cnt); > + set_test_stage(test, 1); > + return true; > + } > + > + if (vmcb->control.exit_code == SVM_EXIT_INTR) { > + > + void* guest_rip = (void*)vmcb->save.rip; > + > + irq_enable(); > + asm volatile ("nop"); > + irq_disable(); > + > + if (guest_rip == insb_instruction_label && io_port_var != 0xAA) { > + report(false, > + "RIP corruption detected after %d timer interrupts", > + isr_cnt); > + return true; > + } > + > + } > + return false; > +} > + > +static bool reg_corruption_check(struct svm_test *test) > +{ > + return get_test_stage(test) == 1; > +} > + > #define TEST(name) { #name, .v2 = name } > > /* > @@ -1950,6 +2050,9 @@ struct svm_test svm_tests[] = { > { "virq_inject", default_supported, virq_inject_prepare, > default_prepare_gif_clear, virq_inject_test, > virq_inject_finished, virq_inject_check }, > + { "reg_corruption", default_supported, reg_corruption_prepare, > + default_prepare_gif_clear, reg_corruption_test, > + reg_corruption_finished, reg_corruption_check }, > TEST(svm_guest_state_test), > { NULL, NULL, NULL, NULL, NULL, NULL, NULL } > }; >