2018-03-23 23:04 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: > On 23/03/2018 15:27, Wanpeng Li wrote: >> 2018-03-22 21:53 GMT+08:00 Andrew Cooper <andrew.cooper3@xxxxxxxxxx>: >>> On 22/03/18 13:39, Wanpeng Li wrote: >>>> 2018-03-22 20:38 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: >>>>> On 22/03/2018 12:04, Andrew Cooper wrote: >>>>>> We've got a Force Emulation Prefix (ud2a; .ascii "xen") for doing >>>>>> magic. Originally, this was used for PV guests to explicitly request an >>>>>> emulated CPUID, but I extended it to HVM guests for "emulate the next >>>>>> instruction", after we had some guest user => guest kernel privilege >>>>>> escalations because of incorrect emulation. >>>>> Wanpeng, why don't you add it behind a new kvm module parameter? :) >>>> Great point! I will have a try. Thanks Paolo and Andrew. :) >>> >>> Using the force emulation prefix requires intercepting #UD, which is in >>> general a BadThing(tm) for security. Therefore, we have a build time >> >> Yeah, however kvm intercepts and emulates #UD by default, should we >> add a new kvm module parameter to enable it and disable by default? > > No, the module parameter should only be about the force-emulation prefix. How about something like this? (Add EmulateOnUD to cpuid, the testcase will use it) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd88158..80da5c6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4772,7 +4772,7 @@ static const struct opcode twobyte_table[256] = { X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), - II(ImplicitOps, em_cpuid, cpuid), + II(EmulateOnUD | ImplicitOps, em_cpuid, cpuid), F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9bc05f5..1825b45 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -108,6 +108,9 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); static bool __read_mostly nested = 0; module_param(nested, bool, S_IRUGO); +static bool __read_mostly fep = 0; +module_param(fep, bool, S_IRUGO); + static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; @@ -6215,6 +6218,27 @@ static int handle_machine_check(struct kvm_vcpu *vcpu) return 1; } +static int handle_ud(struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + + if (fep) { + char sig[5]; /* ud2; .ascii "kvm" */ + struct x86_exception e; + + kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, + kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e); + if (memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) + kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); + } + er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; + if (er != EMULATE_DONE) + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + static int handle_exception(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6233,14 +6257,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ - if (is_invalid_opcode(intr_info)) { - er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + if (is_invalid_opcode(intr_info)) + return handle_ud(vcpu); error_code = 0; if (intr_info & INTR_INFO_DELIVER_CODE_MASK) The testcase: #include <stdio.h> #include <string.h> #define HYPERVISOR_INFO 0x40000000 #define CPUID(idx, eax, ebx, ecx, edx)\ asm volatile (\ "test %1,%1;jz 1f; ud2a; .ascii \"kvm\"; 1: cpuid" \ :"=b" (*ebx), "=a" (*eax),"=c" (*ecx), "=d" (*edx)\ :"0"(idx) ); void main() { unsigned int eax,ebx,ecx,edx; char string[13]; CPUID(HYPERVISOR_INFO, &eax, &ebx, &ecx, &edx); *(unsigned int *)(string+0) = ebx; *(unsigned int *)(string+4) = ecx; *(unsigned int *)(string+8) = edx; string[12] = 0; if (strncmp(string, "KVMKVMKVM\0\0\0",12) == 0) { printf("kvm guest\n"); } else printf("bare hardware\n"); } Regards, Wanpeng Li