From: Julien Thierry <julien.thierry@xxxxxxx> kvm_vcpu_kick() is not NMI safe. When the overflow handler is called from NMI context, defer waking the vcpu to an irq_work queue. A vcpu can be freed while it's not running by kvm_destroy_vm(). Prevent running the irq_work for a non-existent vcpu by calling irq_work_sync() on the PMU destroy path. Cc: Julien Thierry <julien.thierry.kdev@xxxxxxxxx> Cc: Marc Zyngier <marc.zyngier@xxxxxxx> Cc: Will Deacon <will.deacon@xxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: James Morse <james.morse@xxxxxxx> Cc: Suzuki K Pouloze <suzuki.poulose@xxxxxxx> Cc: kvm@xxxxxxxxxxxxxxx Cc: kvmarm@xxxxxxxxxxxxxxxxxxxxx Signed-off-by: Julien Thierry <julien.thierry@xxxxxxx> Tested-by: Sumit Garg <sumit.garg@xxxxxxxxxx> (Developerbox) [Alexandru E.: Added irq_work_sync()] Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx> --- I suggested in v6 that I will add an irq_work_sync() to kvm_pmu_vcpu_reset(). It turns out it's not necessary: a vcpu reset is done by the vcpu being reset with interrupts enabled, which means all the work has had a chance to run before the reset takes place. arch/arm64/kvm/pmu-emul.c | 26 +++++++++++++++++++++++++- include/kvm/arm_pmu.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f0d0312c0a55..81916e360b1e 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -269,6 +269,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) kvm_pmu_release_perf_event(&pmu->pmc[i]); + irq_work_sync(&vcpu->arch.pmu.overflow_work); } u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) @@ -433,6 +434,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) kvm_pmu_update_state(vcpu); } +/** + * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding + * to the event. + * This is why we need a callback to do it once outside of the NMI context. + */ +static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) +{ + struct kvm_vcpu *vcpu; + struct kvm_pmu *pmu; + + pmu = container_of(work, struct kvm_pmu, overflow_work); + vcpu = kvm_pmc_to_vcpu(pmu->pmc); + + kvm_vcpu_kick(vcpu); +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -465,7 +482,11 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); - kvm_vcpu_kick(vcpu); + + if (!in_nmi()) + kvm_vcpu_kick(vcpu); + else + irq_work_queue(&vcpu->arch.pmu.overflow_work); } cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); @@ -764,6 +785,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return ret; } + init_irq_work(&vcpu->arch.pmu.overflow_work, + kvm_pmu_perf_overflow_notify_vcpu); + vcpu->arch.pmu.created = true; return 0; } diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 6db030439e29..dbf4f08d42e5 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -27,6 +27,7 @@ struct kvm_pmu { bool ready; bool created; bool irq_level; + struct irq_work overflow_work; }; #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) -- 2.28.0