On 26/03/2018 14:25, Wanpeng Li wrote: > 2018-03-23 23:04 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: >> On 23/03/2018 15:27, Wanpeng Li wrote: >>> 2018-03-22 21:53 GMT+08:00 Andrew Cooper <andrew.cooper3@xxxxxxxxxx>: >>>> On 22/03/18 13:39, Wanpeng Li wrote: >>>>> 2018-03-22 20:38 GMT+08:00 Paolo Bonzini <pbonzini@xxxxxxxxxx>: >>>>>> On 22/03/2018 12:04, Andrew Cooper wrote: >>>>>>> We've got a Force Emulation Prefix (ud2a; .ascii "xen") for doing >>>>>>> magic. Originally, this was used for PV guests to explicitly request an >>>>>>> emulated CPUID, but I extended it to HVM guests for "emulate the next >>>>>>> instruction", after we had some guest user => guest kernel privilege >>>>>>> escalations because of incorrect emulation. >>>>>> Wanpeng, why don't you add it behind a new kvm module parameter? :) >>>>> Great point! I will have a try. Thanks Paolo and Andrew. :) >>>> >>>> Using the force emulation prefix requires intercepting #UD, which is in >>>> general a BadThing(tm) for security. Therefore, we have a build time >>> >>> Yeah, however kvm intercepts and emulates #UD by default, should we >>> add a new kvm module parameter to enable it and disable by default? >> >> No, the module parameter should only be about the force-emulation prefix. > > How about something like this? (Add EmulateOnUD to cpuid, the testcase > will use it) I think you don't need either EmulateOnUD or EMULTYPE_TRAP_UD (the latter only when fep=1 of course). Otherwise yes. Paolo > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index dd88158..80da5c6 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -4772,7 +4772,7 @@ static const struct opcode twobyte_table[256] = { > X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), > /* 0xA0 - 0xA7 */ > I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), > - II(ImplicitOps, em_cpuid, cpuid), > + II(EmulateOnUD | ImplicitOps, em_cpuid, cpuid), > F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), > F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), > F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 9bc05f5..1825b45 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -108,6 +108,9 @@ module_param_named(enable_shadow_vmcs, > enable_shadow_vmcs, bool, S_IRUGO); > static bool __read_mostly nested = 0; > module_param(nested, bool, S_IRUGO); > > +static bool __read_mostly fep = 0; > +module_param(fep, bool, S_IRUGO); > + > static u64 __read_mostly host_xss; > > static bool __read_mostly enable_pml = 1; > @@ -6215,6 +6218,27 @@ static int handle_machine_check(struct kvm_vcpu *vcpu) > return 1; > } > > +static int handle_ud(struct kvm_vcpu *vcpu) > +{ > + enum emulation_result er; > + > + if (fep) { > + char sig[5]; /* ud2; .ascii "kvm" */ > + struct x86_exception e; > + > + kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, > + kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e); > + if (memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) > + kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); > + } > + er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); > + if (er == EMULATE_USER_EXIT) > + return 0; > + if (er != EMULATE_DONE) > + kvm_queue_exception(vcpu, UD_VECTOR); > + return 1; > +} > + > static int handle_exception(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -6233,14 +6257,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) > if (is_nmi(intr_info)) > return 1; /* already handled by vmx_vcpu_run() */ > > - if (is_invalid_opcode(intr_info)) { > - er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); > - if (er == EMULATE_USER_EXIT) > - return 0; > - if (er != EMULATE_DONE) > - kvm_queue_exception(vcpu, UD_VECTOR); > - return 1; > - } > + if (is_invalid_opcode(intr_info)) > + return handle_ud(vcpu); > > error_code = 0; > if (intr_info & INTR_INFO_DELIVER_CODE_MASK) > > > The testcase: > > #include <stdio.h> > #include <string.h> > > #define HYPERVISOR_INFO 0x40000000 > > #define CPUID(idx, eax, ebx, ecx, edx)\ > asm volatile (\ > "test %1,%1;jz 1f; ud2a; .ascii \"kvm\"; 1: cpuid" \ > :"=b" (*ebx), "=a" (*eax),"=c" (*ecx), "=d" (*edx)\ > :"0"(idx) ); > > void main() > { > unsigned int eax,ebx,ecx,edx; > char string[13]; > > CPUID(HYPERVISOR_INFO, &eax, &ebx, &ecx, &edx); > *(unsigned int *)(string+0) = ebx; > *(unsigned int *)(string+4) = ecx; > *(unsigned int *)(string+8) = edx; > > string[12] = 0; > if (strncmp(string, "KVMKVMKVM\0\0\0",12) == 0) { > printf("kvm guest\n"); > } else > printf("bare hardware\n"); > > } > > Regards, > Wanpeng Li >