From: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception() and avic_unaccelerated_access_interception() along with two trace points (trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access). Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> --- arch/x86/include/uapi/asm/svm.h | 9 +- arch/x86/kvm/lapic.h | 3 + arch/x86/kvm/svm.c | 246 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/trace.h | 57 ++++++++++ arch/x86/kvm/x86.c | 2 + 5 files changed, 316 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 8a4add8..b9e9bb2 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -73,6 +73,8 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_NPF 0x400 +#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 +#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 #define SVM_EXIT_ERR -1 @@ -107,8 +109,10 @@ { SVM_EXIT_SMI, "smi" }, \ { SVM_EXIT_INIT, "init" }, \ { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \ { SVM_EXIT_CPUID, "cpuid" }, \ { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_PAUSE, "pause" }, \ { SVM_EXIT_HLT, "hlt" }, \ { SVM_EXIT_INVLPG, "invlpg" }, \ { SVM_EXIT_INVLPGA, "invlpga" }, \ @@ -127,7 +131,10 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } + { SVM_EXIT_NPF, "npf" }, \ + { SVM_EXIT_RSM, "rsm" }, \ + { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ + { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" } #endif /* _UAPI__SVM_H */ diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index a70cb62..2fc86b7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -9,6 +9,9 @@ #define KVM_APIC_SIPI 1 #define KVM_APIC_LVT_NUM 6 +#define KVM_APIC_SHORT_MASK 0xc0000 +#define KVM_APIC_DEST_MASK 0x800 + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f9547bc..13fba3b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3515,6 +3515,250 @@ static int mwait_interception(struct vcpu_svm *svm) return nop_interception(svm); } +enum avic_ipi_failure_cause { + AVIC_IPI_FAILURE_INVALID_INT_TYPE, + AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, + AVIC_IPI_FAILURE_INVALID_TARGET, + AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, +}; + +static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) +{ + u32 icrh = svm->vmcb->control.exit_info_1 >> 32; + u32 icrl = svm->vmcb->control.exit_info_1; + u32 id = svm->vmcb->control.exit_info_2 >> 32; + u32 index = svm->vmcb->control.exit_info_2 && 0xFF; + struct kvm_lapic *apic = svm->vcpu.arch.apic; + + trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index); + + switch (id) { + case AVIC_IPI_FAILURE_INVALID_INT_TYPE: + /* + * AVIC hardware handles the generation of + * IPIs when the specified Message Type is Fixed + * (also known as fixed delivery mode) and + * the Trigger Mode is edge-triggered. The hardware + * also supports self and broadcast delivery modes + * specified via the Destination Shorthand(DSH) + * field of the ICRL. Logical and physical APIC ID + * formats are supported. All other IPI types cause + * a #VMEXIT, which needs to emulated. + */ + kvm_lapic_reg_write(apic, APIC_ICR2, icrh); + kvm_lapic_reg_write(apic, APIC_ICR, icrl); + break; + case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: { + int i; + struct kvm_vcpu *vcpu; + struct kvm *kvm = svm->vcpu.kvm; + struct kvm_lapic *apic = svm->vcpu.arch.apic; + + /* + * At this point, we expect that the AVIC HW has already + * set the appropriate IRR bits on the valid target + * vcpus. So, we just need to kick the appropriate vcpu. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + bool m = kvm_apic_match_dest(vcpu, apic, + icrl & KVM_APIC_SHORT_MASK, + GET_APIC_DEST_FIELD(icrh), + icrl & KVM_APIC_DEST_MASK); + + if (m && !avic_vcpu_is_running(vcpu)) + kvm_vcpu_wake_up(vcpu); + } + break; + } + case AVIC_IPI_FAILURE_INVALID_TARGET: + break; + case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: + WARN_ONCE(1, "Invalid backing page\n"); + break; + default: + pr_err("Unknown IPI interception\n"); + } + + return 1; +} + +static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u8 mda, bool flat) +{ + struct kvm_arch *vm_data = &vcpu->kvm->arch; + int index; + u32 *logical_apic_id_table; + + if (flat) { /* flat */ + if (mda > 7) + return NULL; + index = mda; + } else { /* cluster */ + int apic_id = mda & 0xf; + int cluster_id = (mda & 0xf0) >> 8; + + if (apic_id > 4 || cluster_id >= 0xf) + return NULL; + index = (cluster_id << 2) + apic_id; + } + logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); + + return &logical_apic_id_table[index]; +} + +static int avic_handle_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, + u8 logical_id) +{ + u32 mod; + u32 *entry, new_entry; + struct vcpu_svm *svm = to_svm(vcpu); + + if (!svm) + return -EINVAL; + + mod = (kvm_apic_get_reg(svm->vcpu.arch.apic, APIC_DFR) >> 28) & 0xf; + entry = avic_get_logical_id_entry(vcpu, logical_id, (mod == 0xf)); + if (!entry) + return -EINVAL; + + new_entry = READ_ONCE(*entry); + new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; + new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); + new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; + WRITE_ONCE(*entry, new_entry); + + return 0; +} + +static int avic_unaccel_trap_write(struct vcpu_svm *svm) +{ + u32 offset = svm->vmcb->control.exit_info_1 & 0xFF0; + struct kvm_lapic *apic = svm->vcpu.arch.apic; + u32 reg = kvm_apic_get_reg(apic, offset); + + switch (offset) { + case APIC_ID: { + u32 aid = (reg >> 24) & 0xff; + u64 *o_ent = avic_get_physical_id_entry(&svm->vcpu, + svm->vcpu.vcpu_id); + u64 *n_ent = avic_get_physical_id_entry(&svm->vcpu, aid); + + if (!n_ent || !o_ent) + return 0; + + /* We need to move physical_id_entry to new offset */ + *n_ent = *o_ent; + *o_ent = 0ULL; + svm->avic_physical_id_cache = n_ent; + break; + } + case APIC_LDR: { + int ret, lid; + int dlid = (reg >> 24) & 0xff; + + if (!dlid) + return 0; + + lid = ffs(dlid) - 1; + ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid); + if (ret) + return 0; + + break; + } + case APIC_DFR: { + struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; + u32 mod = (reg >> 28) & 0xf; + + /* + * We assume that all local APICs are using the same type. + * If this changes, we need to rebuild the AVIC logical + * APID id table with subsequent write to APIC_LDR. + */ + if (vm_data->ldr_mode != mod) { + clear_page(page_address(vm_data->avic_logical_id_table_page)); + vm_data->ldr_mode = mod; + } + break; + } + default: + break; + } + + kvm_lapic_reg_write(apic, offset, reg); + + return 1; +} + +static bool is_avic_unaccelerated_access_trap(u32 offset) +{ + bool ret = false; + + switch (offset) { + case APIC_ID: + case APIC_EOI: + case APIC_RRR: + case APIC_LDR: + case APIC_DFR: + case APIC_SPIV: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTT: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + case APIC_TMICT: + case APIC_TDCR: + ret = true; + break; + default: + break; + } + return ret; +} + +#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1 +#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 +#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF + +static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) +{ + int ret = 0; + u32 offset = svm->vmcb->control.exit_info_1 & + AVIC_UNACCEL_ACCESS_OFFSET_MASK; + u32 vector = svm->vmcb->control.exit_info_2 & + AVIC_UNACCEL_ACCESS_VECTOR_MASK; + bool write = (svm->vmcb->control.exit_info_1 >> 32) & + AVIC_UNACCEL_ACCESS_WRITE_MASK; + bool trap = is_avic_unaccelerated_access_trap(offset); + + trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset, + trap, write, vector); + + /** + * AVIC does not support x2APIC registers, and we only advertise + * xAPIC when enable AVIC. Therefore, access to these registers + * will not be supported. + */ + if (offset >= 0x400) { + WARN(1, "Unsupported APIC offset %#x\n", offset); + return ret; + } + + if (trap) { + /* Handling Trap */ + if (!write) /* Trap read should never happens */ + BUG(); + ret = avic_unaccel_trap_write(svm); + } else { + /* Handling Fault */ + ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + } + + return ret; +} + static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, @@ -3578,6 +3822,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = pf_interception, [SVM_EXIT_RSM] = emulate_on_interception, + [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, + [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, }; static void dump_vmcb(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 2f1ea2f..39f264c 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1292,6 +1292,63 @@ TRACE_EVENT(kvm_hv_stimer_cleanup, __entry->vcpu_id, __entry->timer_index) ); +/* + * Tracepoint for AMD AVIC + */ +TRACE_EVENT(kvm_avic_incomplete_ipi, + TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index), + TP_ARGS(vcpu, icrh, icrl, id, index), + + TP_STRUCT__entry( + __field(u32, vcpu) + __field(u32, icrh) + __field(u32, icrl) + __field(u32, id) + __field(u32, index) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->icrh = icrh; + __entry->icrl = icrl; + __entry->id = id; + __entry->index = index; + ), + + TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n", + __entry->vcpu, __entry->icrh, __entry->icrl, + __entry->id, __entry->index) +); + +TRACE_EVENT(kvm_avic_unaccelerated_access, + TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec), + TP_ARGS(vcpu, offset, ft, rw, vec), + + TP_STRUCT__entry( + __field(u32, vcpu) + __field(u32, offset) + __field(bool, ft) + __field(bool, rw) + __field(u32, vec) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->offset = offset; + __entry->ft = ft; + __entry->rw = rw; + __entry->vec = vec; + ), + + TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n", + __entry->vcpu, + __entry->offset, + __print_symbolic(__entry->offset, kvm_trace_symbol_apic), + __entry->ft ? "trap" : "fault", + __entry->rw ? "write" : "read", + __entry->vec) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d12583e..b0f211c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8437,3 +8437,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html