Sean Christopherson <seanjc@xxxxxxxxxx> writes: > On Fri, Aug 13, 2021, David Edmondson wrote: >> -static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) >> +static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, >> + u8 ndata, u8 *insn_bytes, u8 insn_size) >> { >> - struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; >> - u32 insn_size = ctxt->fetch.end - ctxt->fetch.data; >> struct kvm_run *run = vcpu->run; >> + u8 ndata_start; >> + u64 info[5]; >> + >> + /* >> + * Zero the whole array used to retrieve the exit info, casting to u32 >> + * for select entries will leave some chunks uninitialized. >> + */ >> + memset(&info, 0, sizeof(info)); >> + >> + static_call(kvm_x86_get_exit_info)(vcpu, (u32 *)&info[0], &info[1], >> + &info[2], (u32 *)&info[3], >> + (u32 *)&info[4]); >> >> run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >> run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION; >> - run->emulation_failure.ndata = 0; >> + >> + /* >> + * There's currently space for 13 entries, but 5 are used for the exit >> + * reason and info. Restrict to 4 to reduce the maintenance burden >> + * when expanding kvm_run.emulation_failure in the future. >> + */ >> + if (WARN_ON_ONCE(ndata > 4)) >> + ndata = 4; >> + >> + /* Always include the flags as a 'data' entry. */ >> + ndata_start = 1; >> run->emulation_failure.flags = 0; >> >> if (insn_size) { >> - run->emulation_failure.ndata = 3; >> + ndata_start += (sizeof(run->emulation_failure.insn_size) + >> + sizeof(run->emulation_failure.insn_bytes)) / >> + sizeof(u64); > > Hrm, I like the intent, but the end result ends up being rather convoluted and > unnecessarily scary, e.g. this would do the wrong thing if the combined size of > the fields is not a multiple of 8. That's obviously is not true, but relying on > insn_size/insn_bytes being carefully selected while simultaneously obscuring that > dependency is a bit mean. What about a compile-time assertion with a more reader > friendly literal for bumping the count? > > BUILD_BUG_ON((sizeof(run->emulation_failure.insn_size) + > sizeof(run->emulation_failure.insn_bytes) != 16)); > ndata_start += 2; Okay. >> run->emulation_failure.flags |= >> KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES; >> run->emulation_failure.insn_size = insn_size; >> memset(run->emulation_failure.insn_bytes, 0x90, >> sizeof(run->emulation_failure.insn_bytes)); >> - memcpy(run->emulation_failure.insn_bytes, >> - ctxt->fetch.data, insn_size); >> + memcpy(run->emulation_failure.insn_bytes, insn_bytes, insn_size); >> } >> + >> + memcpy(&run->internal.data[ndata_start], info, sizeof(info)); > > Oof, coming back to this code after some time away, "ndata_start" is confusing. > I believe past me thought that it would help convey that "info" is lumped into > the arbitrary data, but for me at least it just ends up making the interaction > with @data and @ndata more confusing. Sorry for the bad suggestion :-/ > > What about info_start? IMO, that makes the memcpy more readable. Another option > would be to have the name describe the number of "ABI enries", but I can't come > up with a variable name that's remotely readable. > > memcpy(&run->internal.data[info_start], info, sizeof(info)); > memcpy(&run->internal.data[info_start + ARRAY_SIZE(info)], data, > ndata * sizeof(data[0])); Okay. >> + memcpy(&run->internal.data[ndata_start + ARRAY_SIZE(info)], data, >> + ndata * sizeof(u64)); > > Not that it really matters, but it's probably better to use sizeof(data[0]) or > sizeof(*data). E.g. if we do screw up the param in the future, we only botch the > output formatting, as opposed to dumping kernel stack data to userspace. Agreed. >> + >> + run->emulation_failure.ndata = ndata_start + ARRAY_SIZE(info) + ndata; >> } >> >> +static void prepare_emulation_ctxt_failure_exit(struct kvm_vcpu *vcpu) >> +{ >> + struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; >> + >> + prepare_emulation_failure_exit(vcpu, NULL, 0, ctxt->fetch.data, >> + ctxt->fetch.end - ctxt->fetch.data); >> +} >> + >> +void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, >> + u8 ndata) >> +{ >> + prepare_emulation_failure_exit(vcpu, data, ndata, NULL, 0); >> +} >> +EXPORT_SYMBOL_GPL(__kvm_prepare_emulation_failure_exit); >> + >> +void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu) >> +{ >> + __kvm_prepare_emulation_failure_exit(vcpu, NULL, 0); >> +} >> +EXPORT_SYMBOL_GPL(kvm_prepare_emulation_failure_exit); >> + >> static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) >> { >> struct kvm *kvm = vcpu->kvm; >> @@ -7502,16 +7551,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) >> >> if (kvm->arch.exit_on_emulation_error || >> (emulation_type & EMULTYPE_SKIP)) { >> - prepare_emulation_failure_exit(vcpu); >> + prepare_emulation_ctxt_failure_exit(vcpu); >> return 0; >> } >> >> kvm_queue_exception(vcpu, UD_VECTOR); >> >> if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) { >> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; >> - vcpu->run->internal.ndata = 0; >> + prepare_emulation_ctxt_failure_exit(vcpu); >> return 0; >> } >> >> @@ -12104,9 +12151,7 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, >> * doesn't seem to be a real use-case behind such requests, just return >> * KVM_EXIT_INTERNAL_ERROR for now. >> */ >> - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; >> - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; >> - vcpu->run->internal.ndata = 0; >> + kvm_prepare_emulation_failure_exit(vcpu); >> >> return 0; >> } >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >> index 6c79c1ce3703..e86cc2de7b5c 100644 >> --- a/include/uapi/linux/kvm.h >> +++ b/include/uapi/linux/kvm.h >> @@ -397,6 +397,12 @@ struct kvm_run { >> * "ndata" is correct, that new fields are enumerated in "flags", >> * and that each flag enumerates fields that are 64-bit aligned >> * and sized (so that ndata+internal.data[] is valid/accurate). >> + * >> + * Space beyond the defined fields may be used to > > Please run these out to 80 chars. Even 80 is a soft limit, it's ok to run over > a bit if the end result is (subjectively) prettier. > >> + * store arbitrary debug information relating to the >> + * emulation failure. It is accounted for in "ndata" >> + * but otherwise unspecified and is not represented in > > Explicitly state the format is unspecified? > >> + * "flags". > > And also explicitly stating the debug info isn't ABI, e.g. > > * Space beyond the defined fields may be used to store arbitrary > * debug information relating to the emulation failure. It is > * accounted for in "ndata" but the format is unspecified and > * is not represented in "flags". Any such info is _not_ ABI! Okay. >> */ >> struct { >> __u32 suberror; >> @@ -408,6 +414,7 @@ struct kvm_run { >> __u8 insn_bytes[15]; >> }; >> }; >> + /* Arbitrary debug data may follow. */ >> } emulation_failure; >> /* KVM_EXIT_OSI */ >> struct { >> -- >> 2.30.2 >>