Hi Paolo, so I've been hacking at early code recently and found this to be pretty useful. Thoughts? This is AMD-only now but I'll extend it to Intel too, if there's interest. --- From: Borislav Petkov <bp@xxxxxxx> Date: Tue, 1 Mar 2016 14:34:28 +0100 Subject: [RFC PATCH] kvm: Make KVM DF intercept configurable Sometimes it is useful when testing kernels in guests to know why the guest triple-faults, especially if one is poking at early kernel code where there's no debugging output yet. So add a module parameter which makes the guest exit on a #DF and log error info into a #DF tracepoint. Which turns out to be very useful: 1. Early PF causing a DF: ------------------------- diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22fbf9df61bb..57d7d7a68c7d 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -64,6 +64,8 @@ startup_64: * tables and then reload them. */ + movq stack_start, %rsp --- We can't read that early from stack_start because we haven't enabled paging yet. KVM logs: qemu-system-x86-9523 [001] .... 1941.666236: kvm_df: Guest DF! rIP: 0x1000000, prev exception: 0xe, error_code: 0x0 and the exception causing the #DF is a #PF (vector 0xe). 2. #GP ------------------------- diff --git a/init/main.c b/init/main.c index 7c27de4577ed..869eb6110765 100644 --- a/init/main.c +++ b/init/main.c @@ -967,6 +969,9 @@ static int __ref kernel_init(void *unused) ramdisk_execute_command, ret); } + load_idt(&no_idt); + __asm__ __volatile__("int3"); --- Loading a NULL IDT and causing a debug interrupt, causes a #GP. KVM logs: qemu-system-x86-12853 [000] .... 2463.500732: kvm_df: Guest DF! rIP: 0xffffffff815ba8ba, prev exception: 0xd, error_code: 0x1a and the previous exception was a #GP (vector 0xd). Signed-off-by: Borislav Petkov <bp@xxxxxxx> --- arch/x86/kvm/svm.c | 15 +++++++++++++++ arch/x86/kvm/trace.h | 20 ++++++++++++++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 36 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c13a64b7d789..09f78bbd8b47 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -205,6 +205,9 @@ module_param(npt, int, S_IRUGO); static int nested = true; module_param(nested, int, S_IRUGO); +static int intercept_df = 0; +module_param(intercept_df, int, S_IRUGO); + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -1023,6 +1026,8 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); set_exception_intercept(svm, DB_VECTOR); + if (intercept_df) + set_exception_intercept(svm, DF_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1728,6 +1733,15 @@ static int nm_interception(struct vcpu_svm *svm) return 1; } +static int df_interception(struct vcpu_svm *svm) +{ + trace_kvm_df(svm->vmcb->save.rip, + svm->vcpu.arch.exception.nr, + svm->vcpu.arch.exception.has_error_code ? + svm->vcpu.arch.exception.error_code : 0); + return 1; +} + static bool is_erratum_383(void) { int err, i; @@ -3308,6 +3322,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, + [SVM_EXIT_EXCP_BASE + DF_VECTOR] = df_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ad9f6a23f139..6143c53f7bde 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -89,6 +89,26 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_df, + TP_PROTO(unsigned long rip, u8 nr, u32 error_code), + TP_ARGS(rip, nr, error_code), + + TP_STRUCT__entry( + __field( unsigned long, rip ) + __field( u8, nr ) + __field( u32, error_code ) + ), + + TP_fast_assign( + __entry->rip = rip; + __entry->nr = nr; + __entry->error_code = error_code; + ), + + TP_printk("Guest DF! rIP: 0x%lx, prev exception: 0x%x, error_code: 0x%x", + __entry->rip, __entry->nr, __entry->error_code) +); + /* * Tracepoint for PIO. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f18a08281015..49f1bd3279f2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8388,3 +8388,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_df); -- 2.3.5 -- Regards/Gruss, Boris. ECO tip #101: Trim your mails when you reply. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html