Hi Gavin, On 1/12/22 7:43 AM, Gavin Shan wrote: > Hi Eric, > > On 11/10/21 6:58 PM, Eric Auger wrote: >> On 8/15/21 2:13 AM, Gavin Shan wrote: >>> This supports SDEI_EVENT_{COMPLETE, COMPLETE_AND_RESUME} hypercall. >>> They are used by the guest to notify the completion of the SDEI >>> event in the handler. The registers are changed according to the >>> SDEI specification as below: >>> >>> * x0 - x17, PC and PState are restored to what values we had in >>> the interrupted context. >>> >>> * If it's SDEI_EVENT_COMPLETE_AND_RESUME hypercall, IRQ exception >>> is injected. >>> >>> Signed-off-by: Gavin Shan <gshan@xxxxxxxxxx> >>> --- >>> arch/arm64/include/asm/kvm_emulate.h | 1 + >>> arch/arm64/include/asm/kvm_host.h | 1 + >>> arch/arm64/kvm/hyp/exception.c | 7 +++ >>> arch/arm64/kvm/inject_fault.c | 27 ++++++++++ >>> arch/arm64/kvm/sdei.c | 75 ++++++++++++++++++++++++++++ >>> 5 files changed, 111 insertions(+) >>> >>> diff --git a/arch/arm64/include/asm/kvm_emulate.h >>> b/arch/arm64/include/asm/kvm_emulate.h >>> index fd418955e31e..923b4d08ea9a 100644 >>> --- a/arch/arm64/include/asm/kvm_emulate.h >>> +++ b/arch/arm64/include/asm/kvm_emulate.h >>> @@ -37,6 +37,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu >>> *vcpu); >>> void kvm_skip_instr32(struct kvm_vcpu *vcpu); >>> void kvm_inject_undefined(struct kvm_vcpu *vcpu); >>> +void kvm_inject_irq(struct kvm_vcpu *vcpu); >>> void kvm_inject_vabt(struct kvm_vcpu *vcpu); >>> void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); >>> void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); >>> diff --git a/arch/arm64/include/asm/kvm_host.h >>> b/arch/arm64/include/asm/kvm_host.h >>> index 46f363aa6524..1824f7e1f9ab 100644 >>> --- a/arch/arm64/include/asm/kvm_host.h >>> +++ b/arch/arm64/include/asm/kvm_host.h >>> @@ -437,6 +437,7 @@ struct kvm_vcpu_arch { >>> #define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) >>> #define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) >>> #define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) >>> +#define KVM_ARM64_EXCEPT_AA32_IRQ (3 << 9) >>> /* For AArch64: */ >>> #define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) >>> #define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) >>> diff --git a/arch/arm64/kvm/hyp/exception.c >>> b/arch/arm64/kvm/hyp/exception.c >>> index 0418399e0a20..ef458207d152 100644 >>> --- a/arch/arm64/kvm/hyp/exception.c >>> +++ b/arch/arm64/kvm/hyp/exception.c >>> @@ -310,6 +310,9 @@ static void kvm_inject_exception(struct kvm_vcpu >>> *vcpu) >>> case KVM_ARM64_EXCEPT_AA32_DABT: >>> enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); >>> break; >>> + case KVM_ARM64_EXCEPT_AA32_IRQ: >>> + enter_exception32(vcpu, PSR_AA32_MODE_IRQ, 4); >>> + break; >>> default: >>> /* Err... */ >>> break; >>> @@ -320,6 +323,10 @@ static void kvm_inject_exception(struct kvm_vcpu >>> *vcpu) >>> KVM_ARM64_EXCEPT_AA64_EL1): >>> enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); >>> break; >>> + case (KVM_ARM64_EXCEPT_AA64_ELx_IRQ | >>> + KVM_ARM64_EXCEPT_AA64_EL1): >>> + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_irq); >>> + break; >>> default: >>> /* >>> * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} >>> diff --git a/arch/arm64/kvm/inject_fault.c >>> b/arch/arm64/kvm/inject_fault.c >>> index b47df73e98d7..3a8c55867d2f 100644 >>> --- a/arch/arm64/kvm/inject_fault.c >>> +++ b/arch/arm64/kvm/inject_fault.c >>> @@ -66,6 +66,13 @@ static void inject_undef64(struct kvm_vcpu *vcpu) >>> vcpu_write_sys_reg(vcpu, esr, ESR_EL1); >>> } >>> +static void inject_irq64(struct kvm_vcpu *vcpu) >>> +{ >>> + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | >>> + KVM_ARM64_EXCEPT_AA64_ELx_IRQ | >>> + KVM_ARM64_PENDING_EXCEPTION); >>> +} >>> + >>> #define DFSR_FSC_EXTABT_LPAE 0x10 >>> #define DFSR_FSC_EXTABT_nLPAE 0x08 >>> #define DFSR_LPAE BIT(9) >>> @@ -77,6 +84,12 @@ static void inject_undef32(struct kvm_vcpu *vcpu) >>> KVM_ARM64_PENDING_EXCEPTION); >>> } >>> +static void inject_irq32(struct kvm_vcpu *vcpu) >>> +{ >>> + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IRQ | >>> + KVM_ARM64_PENDING_EXCEPTION); >>> +} >>> + >>> /* >>> * Modelled after TakeDataAbortException() and >>> TakePrefetchAbortException >>> * pseudocode. >>> @@ -160,6 +173,20 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) >>> inject_undef64(vcpu); >>> } >>> +/** >>> + * kvm_inject_irq - inject an IRQ into the guest >>> + * >>> + * It is assumed that this code is called from the VCPU thread and >>> that the >>> + * VCPU therefore is not currently executing guest code. >>> + */ >>> +void kvm_inject_irq(struct kvm_vcpu *vcpu) >>> +{ >>> + if (vcpu_el1_is_32bit(vcpu)) >>> + inject_irq32(vcpu); >>> + else >>> + inject_irq64(vcpu); >>> +} >>> + >>> void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr) >>> { >>> vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK); >>> diff --git a/arch/arm64/kvm/sdei.c b/arch/arm64/kvm/sdei.c >>> index b5d6d1ed3858..1e8e213c9d70 100644 >>> --- a/arch/arm64/kvm/sdei.c >>> +++ b/arch/arm64/kvm/sdei.c >>> @@ -308,6 +308,75 @@ static unsigned long >>> kvm_sdei_hypercall_context(struct kvm_vcpu *vcpu) >>> return ret; >>> } >>> +static unsigned long kvm_sdei_hypercall_complete(struct kvm_vcpu >>> *vcpu, >>> + bool resume) >>> +{ >>> + struct kvm *kvm = vcpu->kvm; >>> + struct kvm_sdei_kvm *ksdei = kvm->arch.sdei; >>> + struct kvm_sdei_vcpu *vsdei = vcpu->arch.sdei; >>> + struct kvm_sdei_kvm_event *kske = NULL; >>> + struct kvm_sdei_vcpu_event *ksve = NULL; >>> + struct kvm_sdei_vcpu_regs *regs; >>> + unsigned long ret = SDEI_SUCCESS; >> for the RESUME you never seem to read resume_addr arg? How does it work? >> I don't get the irq injection path. Please could you explain? > > The guest kernel uses COMPLETE and COMPLETE_AND_RESUME hypercalls to > notify the > SDEI event has been acknoledged by it. The difference between them is > COMPLETE_AND_RESUME > fires the pending interrupts, but COMPLETE doesn't. > >>> + int index; >>> + >>> + /* Sanity check */ >>> + if (!(ksdei && vsdei)) { >>> + ret = SDEI_NOT_SUPPORTED; >>> + goto out; >>> + } >>> + >>> + spin_lock(&vsdei->lock); >>> + if (vsdei->critical_event) { >>> + ksve = vsdei->critical_event; >>> + regs = &vsdei->state.critical_regs; >>> + vsdei->critical_event = NULL; >>> + vsdei->state.critical_num = KVM_SDEI_INVALID_NUM; >>> + } else if (vsdei->normal_event) { >>> + ksve = vsdei->normal_event; >>> + regs = &vsdei->state.normal_regs; >>> + vsdei->normal_event = NULL; >>> + vsdei->state.normal_num = KVM_SDEI_INVALID_NUM; >>> + } else { >>> + ret = SDEI_DENIED; >>> + goto unlock; >>> + } >>> + >>> + /* Restore registers: x0 -> x17, PC, PState */ >>> + for (index = 0; index < ARRAY_SIZE(regs->regs); index++) >>> + vcpu_set_reg(vcpu, index, regs->regs[index]); >>> + >>> + *vcpu_cpsr(vcpu) = regs->pstate; >>> + *vcpu_pc(vcpu) = regs->pc; >>> + >>> + /* Inject interrupt if needed */ >>> + if (resume) >>> + kvm_inject_irq(vcpu); >>> + >>> + /* >>> + * Update state. We needn't take lock in order to update the KVM >>> + * event state as it's not destroyed because of the reference >>> + * count. >>> + */ >>> + kske = ksve->kske; >>> + ksve->state.refcount--; >>> + kske->state.refcount--; >> why double --? > > On each SDEI event is queued for delivery, both reference count are > increased. I guess > it's a bit confusing. I will change in next revision: > > ksve->state.refcount: Increased on each SDEI event is queued for delivered > kske->state.refcount: Increased on each @ksve is created Well generally this kind of stuff is frown upon. > > >>> + if (!ksve->state.refcount) { >> why not using a struct kref directly? > > The reason is kref isn't friendly to userspace. This field (@refcount) > needs to be > migrated :) waiting for the mig documentation to further comment. Thanks Eric > >>> + list_del(&ksve->link); >>> + kfree(ksve); >>> + } >>> + >>> + /* Make another request if there is pending event */ >>> + if (!(list_empty(&vsdei->critical_events) && >>> + list_empty(&vsdei->normal_events))) >>> + kvm_make_request(KVM_REQ_SDEI, vcpu); >>> + >>> +unlock: >>> + spin_unlock(&vsdei->lock); >>> +out: >>> + return ret; >>> +} >>> + >>> static unsigned long kvm_sdei_hypercall_unregister(struct kvm_vcpu >>> *vcpu) >>> { >>> struct kvm *kvm = vcpu->kvm; >>> @@ -628,7 +697,13 @@ int kvm_sdei_hypercall(struct kvm_vcpu *vcpu) >>> ret = kvm_sdei_hypercall_context(vcpu); >>> break; >>> case SDEI_1_0_FN_SDEI_EVENT_COMPLETE: >>> + has_result = false; >>> + ret = kvm_sdei_hypercall_complete(vcpu, false); >>> + break; >>> case SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME: >>> + has_result = false; >>> + ret = kvm_sdei_hypercall_complete(vcpu, true); >>> + break; >>> case SDEI_1_0_FN_SDEI_EVENT_UNREGISTER: >>> ret = kvm_sdei_hypercall_unregister(vcpu); >>> break; >>> > > Thanks, > Gavin > _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm