Re: [STRAWMAN PATCH] KVM: PPC: Add ioctl to specify interrupt controller architecture to emulate

Alexander Graf <agraf@xxxxxxx> · Thu, 7 Mar 2013 15:00:52 +0100

On 07.03.2013, at 04:29, Paul Mackerras wrote:

> This adds a new ioctl, KVM_SET_IRQ_ARCHITECTURE, which is intended to
> be called by userspace to specify that it wishes the kernel to emulate
> a specific interrupt controller architecture.  This doesn't imply the
> creation of any specific device, but does indicate that when vcpus are
> created, space for per-vcpu interrupt controller state should be
> allocated.  Having this ioctl enables userspace to defer creation of
> the actual interrupt controller device(s) until after the vcpus are
> created.
> 
> The KVM_SET_IRQ_ARCHITECTURE ioctl takes a single 32-bit unsigned int
> as its parameter.  Values for this parameter will be defined in
> subsequent patches.  The value of 0 means no in-kernel interrupt
> controller emulation.
> 
> In order to ensure that this ioctl will fail once any vcpu has been
> created, we use an arch.vcpus_created field to indicate that vcpu
> creation has commenced.  We don't use the online_vcpus field because
> that is only incremented after vcpu creation; if we used that there
> would be a window during the first KVM_CREATE_VCPU ioctl where the
> first vcpu had been created but the interrupt architecture could still
> be changed.
> 
> Signed-off-by: Paul Mackerras <paulus@xxxxxxxxx>

Could you please (in a quick and drafty way) try and see if setting the IRQ arch (using enable_cap) after the vcpu got created would work for you?

That enable_cap would then have to loop through all devices and notify irq controllers that a new cpu got spawned.
All vcpu local payloads would have to get allocated and initialized outside of vcpu_create too then.

I don't have a good feeling for how hard this would be and whether locking would become overly difficult. I think it's fair to restrict the enable_cap to only work when no other vcpu is running. Of course, not requiring a stopped machine would make hotplug easier for user space :).

If it turns out to be hard and / or so complex that it's likely to make things more buggy than is worth, this patch is the way to go IMHO.

Alex

> ---
> So, should this all be done in generic code?
> 
> Documentation/virtual/kvm/api.txt   |   22 ++++++++++++
> arch/powerpc/include/asm/kvm_host.h |    2 ++
> arch/powerpc/kvm/powerpc.c          |   66 +++++++++++++++++++++++++++++++++--
> include/uapi/linux/kvm.h            |    3 ++
> 4 files changed, 90 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index cce500a..d550313 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2126,6 +2126,28 @@ header; first `n_valid' valid entries with contents from the data
> written, then `n_invalid' invalid entries, invalidating any previously
> valid entries found.
> 
> +4.79 KVM_SET_IRQ_ARCHITECTURE
> +
> +Capability: KVM_CAP_IRQ_ARCH
> +Architecture: powerpc
> +Type: vm ioctl
> +Parameters: Pointer to u32 (in)
> +Returns: 0 on success, -1 on error
> +
> +This is called before any vcpus are created, if in-kernel interrupt
> +controller emulation is desired, to specify what overall interrupt
> +controller architecture should be emulated.  Having this called before
> +any vcpus are created allows per-vcpu interrupt controller state to be
> +allocated at vcpu creation time, and allows the creation of the actual
> +interrupt controller device to be deferred until after the vcpus are
> +created.
> +
> +The parameter is a 32-bit unsigned integer specifying the
> +architecture, or the value 0 to specify that no emulation should be
> +done.  If the requested architecture is not supported by the kernel,
> +this ioctl returns an EINVAL error.  Otherwise, if this ioctl is
> +called after any vcpus have been created, it returns an EBUSY error.
> +
> 
> 5. The kvm_run structure
> ------------------------
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 8a72d59..e21ea1f 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -227,6 +227,8 @@ struct kvm_arch_memory_slot {
> };
> 
> struct kvm_arch {
> +	unsigned int irq_arch;
> +	int vcpus_created;
> 	unsigned int lpid;
> #ifdef CONFIG_KVM_BOOK3S_64_HV
> 	unsigned long hpt_virt;
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 934413c..b681746 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -441,10 +441,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
> {
> 	struct kvm_vcpu *vcpu;
> 	vcpu = kvmppc_core_vcpu_create(kvm, id);
> -	if (!IS_ERR(vcpu)) {
> -		vcpu->arch.wqp = &vcpu->wq;
> -		kvmppc_create_vcpu_debugfs(vcpu, id);
> +	if (IS_ERR(vcpu))
> +		goto out;
> +
> +	/* Create per-vcpu irq controller state if needed */
> +	mutex_lock(&kvm->lock);
> +	kvm->arch.vcpus_created = 1;
> +	switch (kvm->arch.irq_arch) {
> +	default:
> +		break;
> 	}
> +	mutex_unlock(&kvm->lock);
> +
> +	vcpu->arch.wqp = &vcpu->wq;
> +	kvmppc_create_vcpu_debugfs(vcpu, id);
> + out:
> 	return vcpu;
> }
> 
> @@ -459,6 +470,12 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
> 	hrtimer_cancel(&vcpu->arch.dec_timer);
> 	tasklet_kill(&vcpu->arch.tasklet);
> 
> +	/* Destroy per-vcpu irq controller state */
> +	switch (vcpu->kvm->arch.irq_arch) {
> +	default:
> +		break;
> +	}
> +
> 	kvmppc_remove_vcpu_debugfs(vcpu);
> 	kvmppc_core_vcpu_free(vcpu);
> }
> @@ -996,6 +1013,49 @@ long kvm_arch_vm_ioctl(struct file *filp,
> 		break;
> 	}
> #endif /* CONFIG_PPC_BOOK3S_64 */
> +
> +	case KVM_IRQ_LINE: {
> +		struct kvm *kvm = filp->private_data;
> +		struct kvm_irq_level args;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&args, argp, sizeof(args)))
> +			break;
> +		switch (kvm->arch.irq_arch) {
> +		default:
> +			r = -EINVAL;
> +		}
> +		break;
> +	}
> +
> +	case KVM_SET_IRQ_ARCHITECTURE: {
> +		struct kvm *kvm = filp->private_data;
> +		u32 arch_id;
> +
> +		r = -EFAULT;
> +		if (get_user(arch_id, (u32 __user *)argp))
> +			break;
> +		r = 0;
> +		switch (arch_id) {
> +		case 0:		/* no emulation */
> +			break;
> +		default:
> +			r = -EINVAL;
> +			break;
> +		}
> +		if (r)
> +			break;
> +
> +		/* mutex ensures we don't race with vcpu creation */
> +		mutex_lock(&kvm->lock);
> +		if (!kvm->arch.vcpus_created)
> +			kvm->arch.irq_arch = arch_id;
> +		else
> +			r = -EBUSY;
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
> +
> 	default:
> 		r = -ENOTTY;
> 	}
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9a2db57..2c0ea1c 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -662,6 +662,7 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_PPC_HTAB_FD 84
> #define KVM_CAP_S390_CSS_SUPPORT 85
> #define KVM_CAP_PPC_EPR 86
> +#define KVM_CAP_IRQ_ARCH 87
> 
> #ifdef KVM_CAP_IRQ_ROUTING
> 
> @@ -889,6 +890,8 @@ struct kvm_s390_ucas_mapping {
> #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
> /* Available with KVM_CAP_PPC_HTAB_FD */
> #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
> +/* Available with KVM_CAP_IRQ_ARCH */
> +#define KVM_SET_IRQ_ARCHITECTURE  _IOW(KVMIO,  0xab, __u32)
> 
> /*
>  * ioctls for vcpu fds
> -- 
> 1.7.10.4
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html