From: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> In PVM, events are injected and consumed directly. The PVM hypervisor does not follow the IDT-based event delivery mechanism but instead utilizes a new PVM-specific event delivery ABI, which is similar to FRED event delivery. Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> --- arch/x86/kvm/pvm/pvm.c | 193 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/pvm/pvm.h | 1 + 2 files changed, 194 insertions(+) diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c index 3d2a3c472664..57d987903791 100644 --- a/arch/x86/kvm/pvm/pvm.c +++ b/arch/x86/kvm/pvm/pvm.c @@ -648,6 +648,150 @@ static void pvm_event_flags_update(struct kvm_vcpu *vcpu, unsigned long set, pvm_put_vcpu_struct(pvm, new_flags != old_flags); } +static void pvm_standard_event_entry(struct kvm_vcpu *vcpu, unsigned long entry) +{ + // Change rip, rflags, rcx and r11 per PVM event delivery specification, + // this allows to use sysret in VM enter. + kvm_rip_write(vcpu, entry); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rcx_write(vcpu, entry); + kvm_r11_write(vcpu, X86_EFLAGS_IF | X86_EFLAGS_FIXED); +} + +/* handle pvm user event per PVM Spec. */ +static int do_pvm_user_event(struct kvm_vcpu *vcpu, int vector, + bool has_err_code, u64 err_code) +{ + struct vcpu_pvm *pvm = to_pvm(vcpu); + unsigned long entry = vector == PVM_SYSCALL_VECTOR ? + pvm->msr_lstar : pvm->msr_event_entry; + struct pvm_vcpu_struct *pvcs; + + pvcs = pvm_get_vcpu_struct(pvm); + if (!pvcs) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + return 1; + } + + pvcs->user_cs = pvm->hw_cs; + pvcs->user_ss = pvm->hw_ss; + pvcs->eflags = kvm_get_rflags(vcpu); + pvcs->pkru = 0; + pvcs->user_gsbase = pvm_read_guest_gs_base(pvm); + pvcs->rip = kvm_rip_read(vcpu); + pvcs->rsp = kvm_rsp_read(vcpu); + pvcs->rcx = kvm_rcx_read(vcpu); + pvcs->r11 = kvm_r11_read(vcpu); + + if (has_err_code) + pvcs->event_errcode = err_code; + if (vector != PVM_SYSCALL_VECTOR) + pvcs->event_vector = vector; + + if (vector == PF_VECTOR) + pvcs->cr2 = vcpu->arch.cr2; + + pvm_put_vcpu_struct(pvm, true); + + switch_to_smod(vcpu); + + pvm_standard_event_entry(vcpu, entry); + + return 1; +} + +static int do_pvm_supervisor_exception(struct kvm_vcpu *vcpu, int vector, + bool has_error_code, u64 error_code) +{ + struct vcpu_pvm *pvm = to_pvm(vcpu); + unsigned long stack; + struct pvm_supervisor_event frame; + struct x86_exception e; + int ret; + + memset(&frame, 0, sizeof(frame)); + frame.cs = kernel_cs_by_msr(pvm->msr_star); + frame.ss = kernel_ds_by_msr(pvm->msr_star); + frame.rip = kvm_rip_read(vcpu); + frame.rflags = kvm_get_rflags(vcpu); + frame.rsp = kvm_rsp_read(vcpu); + frame.errcode = ((unsigned long)vector << 32) | error_code; + frame.r11 = kvm_r11_read(vcpu); + frame.rcx = kvm_rcx_read(vcpu); + + stack = ((frame.rsp - pvm->msr_supervisor_redzone) & ~15UL) - sizeof(frame); + + ret = kvm_write_guest_virt_system(vcpu, stack, &frame, sizeof(frame), &e); + if (ret) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + return 1; + } + + if (vector == PF_VECTOR) { + struct pvm_vcpu_struct *pvcs; + + pvcs = pvm_get_vcpu_struct(pvm); + if (!pvcs) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + return 1; + } + + pvcs->cr2 = vcpu->arch.cr2; + pvm_put_vcpu_struct(pvm, true); + } + + kvm_rsp_write(vcpu, stack); + + pvm_standard_event_entry(vcpu, pvm->msr_event_entry + 256); + + return 1; +} + +static int do_pvm_supervisor_interrupt(struct kvm_vcpu *vcpu, int vector, + bool has_error_code, u64 error_code) +{ + struct vcpu_pvm *pvm = to_pvm(vcpu); + unsigned long stack = kvm_rsp_read(vcpu); + struct pvm_vcpu_struct *pvcs; + + pvcs = pvm_get_vcpu_struct(pvm); + if (!pvcs) { + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + return 1; + } + pvcs->eflags = kvm_get_rflags(vcpu); + pvcs->rip = kvm_rip_read(vcpu); + pvcs->rsp = stack; + pvcs->rcx = kvm_rcx_read(vcpu); + pvcs->r11 = kvm_r11_read(vcpu); + + pvcs->event_vector = vector; + if (has_error_code) + pvcs->event_errcode = error_code; + + pvm_put_vcpu_struct(pvm, true); + + stack = (stack - pvm->msr_supervisor_redzone) & ~15UL; + kvm_rsp_write(vcpu, stack); + + pvm_standard_event_entry(vcpu, pvm->msr_event_entry + 512); + + return 1; +} + +static int do_pvm_event(struct kvm_vcpu *vcpu, int vector, + bool has_error_code, u64 error_code) +{ + if (!is_smod(to_pvm(vcpu))) + return do_pvm_user_event(vcpu, vector, has_error_code, error_code); + + if (vector < 32) + return do_pvm_supervisor_exception(vcpu, vector, + has_error_code, error_code); + + return do_pvm_supervisor_interrupt(vcpu, vector, has_error_code, error_code); +} + static unsigned long pvm_get_rflags(struct kvm_vcpu *vcpu) { return to_pvm(vcpu)->rflags; @@ -722,6 +866,51 @@ static int pvm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !pvm->nmi_mask && !pvm->int_shadow; } +/* Always inject the exception directly and consume the event. */ +static void pvm_inject_exception(struct kvm_vcpu *vcpu) +{ + unsigned int vector = vcpu->arch.exception.vector; + bool has_error_code = vcpu->arch.exception.has_error_code; + u32 error_code = vcpu->arch.exception.error_code; + + kvm_deliver_exception_payload(vcpu, &vcpu->arch.exception); + + if (do_pvm_event(vcpu, vector, has_error_code, error_code)) + kvm_clear_exception_queue(vcpu); +} + +/* Always inject the interrupt directly and consume the event. */ +static void pvm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected) +{ + int irq = vcpu->arch.interrupt.nr; + + trace_kvm_inj_virq(irq, vcpu->arch.interrupt.soft, false); + + if (do_pvm_event(vcpu, irq, false, 0)) + kvm_clear_interrupt_queue(vcpu); + + ++vcpu->stat.irq_injections; +} + +/* Always inject the NMI directly and consume the event. */ +static void pvm_inject_nmi(struct kvm_vcpu *vcpu) +{ + if (do_pvm_event(vcpu, NMI_VECTOR, false, 0)) { + vcpu->arch.nmi_injected = false; + pvm_set_nmi_mask(vcpu, true); + } + + ++vcpu->stat.nmi_injections; +} + +static void pvm_cancel_injection(struct kvm_vcpu *vcpu) +{ + /* + * Nothing to do. Since exceptions/interrupts are delivered immediately + * during event injection, so they cannot be cancelled and reinjected. + */ +} + static void pvm_setup_mce(struct kvm_vcpu *vcpu) { } @@ -1282,6 +1471,10 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = { .handle_exit = pvm_handle_exit, .set_interrupt_shadow = pvm_set_interrupt_shadow, .get_interrupt_shadow = pvm_get_interrupt_shadow, + .inject_irq = pvm_inject_irq, + .inject_nmi = pvm_inject_nmi, + .inject_exception = pvm_inject_exception, + .cancel_injection = pvm_cancel_injection, .interrupt_allowed = pvm_interrupt_allowed, .nmi_allowed = pvm_nmi_allowed, .get_nmi_mask = pvm_get_nmi_mask, diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h index b0c633ce2987..39506ddbe5c5 100644 --- a/arch/x86/kvm/pvm/pvm.h +++ b/arch/x86/kvm/pvm/pvm.h @@ -7,6 +7,7 @@ #define SWITCH_FLAGS_INIT (SWITCH_FLAGS_SMOD) +#define PVM_SYSCALL_VECTOR SWITCH_EXIT_REASONS_SYSCALL #define PVM_FAILED_VMENTRY_VECTOR SWITCH_EXIT_REASONS_FAILED_VMETNRY #define PT_L4_SHIFT 39 -- 2.19.1.6.gb485710b