On Tue, Sep 10, 2024, Maxim Levitsky wrote: > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 52443ccda320f..8118f75a8a35d 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1756,13 +1756,16 @@ struct kvm_x86_ops { > void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); > > /* > - * Retrieve somewhat arbitrary exit information. Intended to > + * Retrieve somewhat arbitrary exit/entry information. Intended to > * be used only from within tracepoints or error paths. > */ > void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, > u64 *info1, u64 *info2, > u32 *exit_int_info, u32 *exit_int_info_err_code); > > + void (*get_entry_info)(struct kvm_vcpu *vcpu, > + u32 *inj_info, u32 *inj_info_error_code); I vote to use the same names as the kvm_exit tracepoint, i.e. intr_into and error_code throughout. While I agree that capturing the "injection" aspect is nice to have, if a user doesn't know that the fields are related to event/intr injection, I don't think "inj" is going to help them connect the dots. On the other, for cases where an event is re-injected, using the same names as kvm_exit provides a direct connection between the event that was being vectored at the time of exit, and the subsequent re-injection of the same event. > int (*check_intercept)(struct kvm_vcpu *vcpu, > struct x86_instruction_info *info, > enum x86_intercept_stage stage, ... > TP_fast_assign( > __entry->vcpu_id = vcpu->vcpu_id; > __entry->rip = kvm_rip_read(vcpu); > - __entry->immediate_exit = force_immediate_exit; > + __entry->immediate_exit = force_immediate_exit; > + __entry->guest_mode = is_guest_mode(vcpu); > + > + static_call(kvm_x86_get_entry_info)(vcpu, > + &__entry->inj_info, > + &__entry->inj_info_err); > ), > > - TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip, > - __entry->immediate_exit ? "[immediate exit]" : "") > + TP_printk("vcpu %u, rip 0x%lx inj 0x%08x inj_error_code 0x%08x%s%s", > + __entry->vcpu_id, __entry->rip, > + __entry->inj_info, __entry->inj_info_err, > + __entry->immediate_exit ? "[immediate exit]" : "", > + __entry->guest_mode ? "[guest]" : "") I 100% agree kvm_entry should capture L1 vs. L2, but looking more closely, I think we should make the entry and exit tracepoints, and then maybe rename trace_kvm_nested_vmexit_inject() => trace_kvm_nested_vmexit(). Currently, trace_kvm_nested_vmexit() traces all exits from L2=>L0, which is rather silly since it's trivial to capture L1 vs. L2 in kvm_exit. I also find it to be quite annoying since the vast, vast majority of time I don't want to trace *just* L2=>L0 exits. And it's especially annoying because if I want to see both L1 and L2 exit, the trace contains a double dose of L2 exits. Last thought, what about always capturing where the transition is occuring? E.g. instead of tagging on "[guest]" at the end, something like this: TP_printk("vcpu %u => L%u rip 0x%lx intr_info 0x%08x error_code 0x%08x%s", __entry->vcpu_id, 1 + __entry->guest_mode, ... and then in kvm_exit: TP_printk("vcpu %u <= L%u reason %s%s%s rip 0x%lx info1 0x%016llx " \ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x " \ "requests 0x%016llx", \ __entry->vcpu_id, 1 + __entry->guest_mode, \ Or use "to" and "from" if the "=>" / "<=" is too cute and confusing. For now, I'm going to omit the is_guest_mode() change purely to avoid churn if we end up squashing the current trace_kvm_nested_vmexit() into trace_kvm_exit(). As I'm about to disappear for two weeks, I'm going to speculatively apply the below so I don't delay the meat of this patch any more than I already have. Please holler if you disagree with the intr_info+error_code terminology, I'm definitely open to other other names, though I do feel quite strongly that entry and exit need to be consistent. These are sitting at the head of "misc", so I can fixup without much fuss. --- From: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Date: Tue, 10 Sep 2024 16:03:48 -0400 Subject: [PATCH 1/2] KVM: x86: Add interrupt injection information to the kvm_entry tracepoint Add VMX/SVM specific interrupt injection info the kvm_entry tracepoint. As is done with kvm_exit, gather the information via a kvm_x86_ops hook to avoid the moderately costly VMREADs on VMX when the tracepoint isn't enabled. Opportunistically rename the parameters in the get_exit_info() declaration to match the names used by both SVM and VMX. Signed-off-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx> Link: https://lore.kernel.org/r/20240910200350.264245-2-mlevitsk@xxxxxxxxxx [sean: drop is_guest_mode() change, use intr_info/error_code for names] Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 7 +++++-- arch/x86/kvm/svm/svm.c | 16 ++++++++++++++++ arch/x86/kvm/trace.h | 9 ++++++++- arch/x86/kvm/vmx/main.c | 1 + arch/x86/kvm/vmx/vmx.c | 9 +++++++++ arch/x86/kvm/vmx/x86_ops.h | 3 +++ 7 files changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5aff7222e40f..8c04472829a0 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -100,6 +100,7 @@ KVM_X86_OP(get_l2_tsc_multiplier) KVM_X86_OP(write_tsc_offset) KVM_X86_OP(write_tsc_multiplier) KVM_X86_OP(get_exit_info) +KVM_X86_OP(get_entry_info) KVM_X86_OP(check_intercept) KVM_X86_OP(handle_exit_irqoff) KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1a09ac99132c..c07d8318e9d8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1770,12 +1770,15 @@ struct kvm_x86_ops { void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); /* - * Retrieve somewhat arbitrary exit information. Intended to + * Retrieve somewhat arbitrary exit/entry information. Intended to * be used only from within tracepoints or error paths. */ void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1, u64 *info2, - u32 *exit_int_info, u32 *exit_int_info_err_code); + u32 *intr_info, u32 *error_code); + + void (*get_entry_info)(struct kvm_vcpu *vcpu, + u32 *intr_info, u32 *error_code); int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8fc2f4a97495..d06fe41a2de0 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3542,6 +3542,21 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, *error_code = 0; } +static void svm_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, + u32 *error_code) +{ + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + *intr_info = control->event_inj; + + if ((*intr_info & SVM_EXITINTINFO_VALID) && + (*intr_info & SVM_EXITINTINFO_VALID_ERR)) + *error_code = control->event_inj_err; + else + *error_code = 0; + +} + static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5082,6 +5097,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS, .get_exit_info = svm_get_exit_info, + .get_entry_info = svm_get_entry_info, .vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index d3aeffd6ae75..c2edf4a36fad 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -22,15 +22,22 @@ TRACE_EVENT(kvm_entry, __field( unsigned int, vcpu_id ) __field( unsigned long, rip ) __field( bool, immediate_exit ) + __field( u32, intr_info ) + __field( u32, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->rip = kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; + + kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, + &__entry->error_code); ), - TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip, + TP_printk("vcpu %u, rip 0x%lx intr_info 0x%08x error_code 0x%08x%s", + __entry->vcpu_id, __entry->rip, + __entry->intr_info, __entry->error_code, __entry->immediate_exit ? "[immediate exit]" : "") ); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 92d35cc6cd15..697e135ba0f3 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -111,6 +111,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .get_mt_mask = vmx_get_mt_mask, .get_exit_info = vmx_get_exit_info, + .get_entry_info = vmx_get_entry_info, .vcpu_after_set_cpuid = vmx_vcpu_after_set_cpuid, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e93c48ff61c5..3fd6df782492 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6194,6 +6194,15 @@ void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, } } +void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code) +{ + *intr_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); + if (is_exception_with_error_code(*intr_info)) + *error_code = vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE); + else + *error_code = 0; +} + static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) { if (vmx->pml_pg) { diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index a55981c5216e..f7f65e81920b 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -104,8 +104,11 @@ void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr); u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); + void vmx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code); +void vmx_get_entry_info(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code); + u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu); u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu); void vmx_write_tsc_offset(struct kvm_vcpu *vcpu); base-commit: 43651b98dd23e3d2d11f14964e98801ba58feccb --