On 2/25/19 5:18 AM, David Gibson wrote: > On Fri, Feb 22, 2019 at 12:28:40PM +0100, Cédric Le Goater wrote: >> When the VM boots, the CAS negotiation process determines which >> interrupt mode to use and invokes a machine reset. At that time, the >> previous KVM interrupt device is 'destroyed' before the chosen one is >> created. Upon destruction, the vCPU interrupt presenters using the KVM >> device should be cleared first, the machine will reconnect them later >> to the new device after it is created. >> >> When using the KVM device, there is still a race window with the early >> checks in kvmppc_native_connect_vcpu(). Yet to be fixed. >> >> Signed-off-by: Cédric Le Goater <clg@xxxxxxxx> >> --- >> arch/powerpc/kvm/book3s_xics.c | 19 +++++++++++++ >> arch/powerpc/kvm/book3s_xive.c | 39 +++++++++++++++++++++++++-- >> arch/powerpc/kvm/book3s_xive_native.c | 16 +++++++++++ >> 3 files changed, 72 insertions(+), 2 deletions(-) >> >> diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c >> index f27ee57ab46e..81cdabf4295f 100644 >> --- a/arch/powerpc/kvm/book3s_xics.c >> +++ b/arch/powerpc/kvm/book3s_xics.c >> @@ -1342,6 +1342,25 @@ static void kvmppc_xics_free(struct kvm_device *dev) >> struct kvmppc_xics *xics = dev->private; >> int i; >> struct kvm *kvm = xics->kvm; >> + struct kvm_vcpu *vcpu; >> + >> + /* >> + * When destroying the VM, the vCPUs are destroyed first and >> + * the vCPU list should be empty. If this is not the case, >> + * then we are simply destroying the device and we should >> + * clean up the vCPU interrupt presenters first. >> + */ >> + if (atomic_read(&kvm->online_vcpus) != 0) { >> + /* >> + * call kick_all_cpus_sync() to ensure that all CPUs >> + * have executed any pending interrupts >> + */ >> + if (is_kvmppc_hv_enabled(kvm)) >> + kick_all_cpus_sync(); >> + >> + kvm_for_each_vcpu(i, vcpu, kvm) >> + kvmppc_xics_free_icp(vcpu); >> + } >> >> debugfs_remove(xics->dentry); >> >> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c >> index 7a14512b8944..0a1c11d6881c 100644 >> --- a/arch/powerpc/kvm/book3s_xive.c >> +++ b/arch/powerpc/kvm/book3s_xive.c >> @@ -1105,11 +1105,19 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) >> void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) >> { >> struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; >> - struct kvmppc_xive *xive = xc->xive; >> + struct kvmppc_xive *xive; >> int i; >> >> + if (!kvmppc_xics_enabled(vcpu)) > > This should be kvmppc_xive_enabled(), no? This is the KVM XICS-on-XIVE device and its IRQ type is KVMPPC_IRQ_XICS. So this is correct :/ May be we should introduce a KVMPPC_IRQ_XICS_ON_XIVE macro to clarify. > >> + return; >> + >> + if (!xc) >> + return; >> + >> pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num); >> >> + xive = xc->xive; >> + >> /* Ensure no interrupt is still routed to that VP */ >> xc->valid = false; >> kvmppc_xive_disable_vcpu_interrupts(vcpu); >> @@ -1146,6 +1154,10 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) >> } >> /* Free the VP */ >> kfree(xc); >> + >> + /* Cleanup the vcpu */ >> + vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; >> + vcpu->arch.xive_vcpu = NULL; >> } >> >> int kvmppc_xive_connect_vcpu(struct kvm_device *dev, >> @@ -1163,7 +1175,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev, >> } >> if (xive->kvm != vcpu->kvm) >> return -EPERM; >> - if (vcpu->arch.irq_type) >> + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) >> return -EBUSY; >> if (kvmppc_xive_find_server(vcpu->kvm, cpu)) { >> pr_devel("Duplicate !\n"); >> @@ -1833,8 +1845,31 @@ static void kvmppc_xive_free(struct kvm_device *dev) >> { >> struct kvmppc_xive *xive = dev->private; >> struct kvm *kvm = xive->kvm; >> + struct kvm_vcpu *vcpu; >> int i; >> >> + /* >> + * When destroying the VM, the vCPUs are destroyed first and >> + * the vCPU list should be empty. If this is not the case, >> + * then we are simply destroying the device and we should >> + * clean up the vCPU interrupt presenters first. >> + */ >> + if (atomic_read(&kvm->online_vcpus) != 0) { >> + /* >> + * call kick_all_cpus_sync() to ensure that all CPUs >> + * have executed any pending interrupts >> + */ >> + if (is_kvmppc_hv_enabled(kvm)) >> + kick_all_cpus_sync(); >> + >> + /* >> + * TODO: There is still a race window with the early >> + * checks in kvmppc_native_connect_vcpu() >> + */ >> + kvm_for_each_vcpu(i, vcpu, kvm) >> + kvmppc_xive_cleanup_vcpu(vcpu); >> + } >> + >> debugfs_remove(xive->dentry); >> >> if (kvm) >> diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c >> index bf60870144f1..c0655164d9af 100644 >> --- a/arch/powerpc/kvm/book3s_xive_native.c >> +++ b/arch/powerpc/kvm/book3s_xive_native.c >> @@ -909,8 +909,24 @@ static void kvmppc_xive_native_free(struct kvm_device *dev) >> { >> struct kvmppc_xive *xive = dev->private; >> struct kvm *kvm = xive->kvm; >> + struct kvm_vcpu *vcpu; >> int i; >> >> + /* >> + * When destroying the VM, the vCPUs are destroyed first and >> + * the vCPU list should be empty. If this is not the case, >> + * then we are simply destroying the device and we should >> + * clean up the vCPU interrupt presenters first. >> + */ >> + if (atomic_read(&kvm->online_vcpus) != 0) { >> + /* >> + * TODO: There is still a race window with the early >> + * checks in kvmppc_xive_native_connect_vcpu() >> + */ >> + kvm_for_each_vcpu(i, vcpu, kvm) >> + kvmppc_xive_native_cleanup_vcpu(vcpu); >> + } >> + >> debugfs_remove(xive->dentry); >> >> pr_devel("Destroying xive native device\n"); >