Re: [PATCH v2 2/2] KVM: s390: add floating irq controller

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Oct 05, 2013 at 01:53:33AM +0200, Alexander Graf wrote:
> 
> On 06.09.2013, at 14:19, Jens Freimann wrote:
> 
[snip] 
> > -int kvm_s390_inject_vm(struct kvm *kvm,
> > -		       struct kvm_s390_interrupt *s390int)
> > +static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
> 
> This really doesn't only inject, it enqueues an interrupt and also injects them then, right?

hmm, maybe the term injecting is misleading. What is meant by injecting here is simply adding
it to the list. The actual "injection" into the guest lowcore is done by the do_deliver_interrupt
function.

> > {
> > 	struct kvm_s390_local_interrupt *li;
> > 	struct kvm_s390_float_interrupt *fi;
> > -	struct kvm_s390_interrupt_info *inti, *iter;
> > +	struct kvm_s390_interrupt_info *iter;
> > 	int sigcpu;
> > 
> > +	mutex_lock(&kvm->lock);
> > +	fi = &kvm->arch.float_int;
> 
> You probably want to move this into your device structure eventually.

Yes, good point.
 
> > +	spin_lock(&fi->lock);
> > +	if (!is_ioint(inti->type)) {
> > +		list_add_tail(&inti->list, &fi->list);
> > +	} else {
> > +		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
> > +
> > +		/* Keep I/O interrupts sorted in isc order. */
> > +		list_for_each_entry(iter, &fi->list, list) {
> > +			if (!is_ioint(iter->type))
> > +				continue;
> > +			if (int_word_to_isc_bits(iter->io.io_int_word)
> > +					<= isc_bits)
> > +				continue;
> > +			break;
> > +		}
> > +		list_add_tail(&inti->list, &iter->list);
> > +	}
> > +	atomic_set(&fi->active, 1);
> > +	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
> > +	if (sigcpu == KVM_MAX_VCPUS) {
> > +		do {
> > +			sigcpu = fi->next_rr_cpu++;
> > +			if (sigcpu == KVM_MAX_VCPUS)
> > +				sigcpu = fi->next_rr_cpu = 0;
> > +		} while (fi->local_int[sigcpu] == NULL);
> > +	}
> > +	li = fi->local_int[sigcpu];
> > +	spin_lock_bh(&li->lock);
> > +	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
> > +	if (waitqueue_active(li->wq))
> > +		wake_up_interruptible(li->wq);
> 
> Does this kick the other vcpu to notify it that an irq is available?
> 
> The code is very spaghetti style. There's certainly a good opportunity to split things up and make it more readable here ;). I think splitting it into "enqueue" and "target cpu deliver" parts makes a lot of sense for starters.

I'm working on a larger patchset that changes interrupt injection and delivery.
The rationale is to make it more readable, use less locking (get rid of lists 
use bitmaps) and become more PoP-compliant. I'll try to make it more obvious.
 
> Btw, usually the way these interrupt controllers work in KVM is that the CPU local interrupt controller part "fetches" interrupts from the floating interrupt controller. So in here you would only enqueue and maybe kick a potential receiver of the interrupt. Before a vcpu goes back into guest state, it asks the floating interrupt controller whether there's anything pending for it. If so, it delivers it.

I think that's kind of what we're doing here. We enqueue by adding it to our list
of pending interrupts, look for an idle cpu, set the external interrupt flag 
and wake it up. 

> 
> That way even if the vcpu you chose here happens to get preempted, you still deliver the interrupt quickly through another vcpu.
> 
> > +	spin_unlock_bh(&li->lock);
> > +	spin_unlock(&fi->lock);
> > +	mutex_unlock(&kvm->lock);
> > +}
> > +
> > +int kvm_s390_inject_vm(struct kvm *kvm,
> > +		       struct kvm_s390_interrupt *s390int)
> > +{
> > +	struct kvm_s390_interrupt_info *inti;
> > +
> > 	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
> > 	if (!inti)
> > 		return -ENOMEM;
> > 
> > -	switch (s390int->type) {
> > +	inti->type = s390int->type;
> > +	switch (inti->type) {
> > 	case KVM_S390_INT_VIRTIO:
> > 		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
> > 			 s390int->parm, s390int->parm64);
> > -		inti->type = s390int->type;
> > 		inti->ext.ext_params = s390int->parm;
> > 		inti->ext.ext_params2 = s390int->parm64;
> > 		break;
> > 	case KVM_S390_INT_SERVICE:
> > 		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
> > -		inti->type = s390int->type;
> > 		inti->ext.ext_params = s390int->parm;
> > 		break;
> > -	case KVM_S390_PROGRAM_INT:
> > -	case KVM_S390_SIGP_STOP:
> > -	case KVM_S390_INT_EXTERNAL_CALL:
> > -	case KVM_S390_INT_EMERGENCY:
> > -		kfree(inti);
> > -		return -EINVAL;
> > 	case KVM_S390_MCHK:
> > 		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
> > 			 s390int->parm64);
> > -		inti->type = s390int->type;
> > 		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
> > 		inti->mchk.mcic = s390int->parm64;
> > 		break;
> > 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
> > -		if (s390int->type & IOINT_AI_MASK)
> > +		if (inti->type & IOINT_AI_MASK)
> > 			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
> > 		else
> > 			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
> > 				 s390int->type & IOINT_CSSID_MASK,
> > 				 s390int->type & IOINT_SSID_MASK,
> > 				 s390int->type & IOINT_SCHID_MASK);
> > -		inti->type = s390int->type;
> > 		inti->io.subchannel_id = s390int->parm >> 16;
> > 		inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
> > 		inti->io.io_int_parm = s390int->parm64 >> 32;
> > @@ -715,42 +748,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
> > 	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
> > 				 2);
> > 
> > -	mutex_lock(&kvm->lock);
> > -	fi = &kvm->arch.float_int;
> > -	spin_lock(&fi->lock);
> > -	if (!is_ioint(inti->type))
> > -		list_add_tail(&inti->list, &fi->list);
> > -	else {
> > -		u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
> > -
> > -		/* Keep I/O interrupts sorted in isc order. */
> > -		list_for_each_entry(iter, &fi->list, list) {
> > -			if (!is_ioint(iter->type))
> > -				continue;
> > -			if (int_word_to_isc_bits(iter->io.io_int_word)
> > -			    <= isc_bits)
> > -				continue;
> > -			break;
> > -		}
> > -		list_add_tail(&inti->list, &iter->list);
> > -	}
> > -	atomic_set(&fi->active, 1);
> > -	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
> > -	if (sigcpu == KVM_MAX_VCPUS) {
> > -		do {
> > -			sigcpu = fi->next_rr_cpu++;
> > -			if (sigcpu == KVM_MAX_VCPUS)
> > -				sigcpu = fi->next_rr_cpu = 0;
> > -		} while (fi->local_int[sigcpu] == NULL);
> > -	}
> > -	li = fi->local_int[sigcpu];
> > -	spin_lock_bh(&li->lock);
> > -	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
> > -	if (waitqueue_active(li->wq))
> > -		wake_up_interruptible(li->wq);
> > -	spin_unlock_bh(&li->lock);
> > -	spin_unlock(&fi->lock);
> > -	mutex_unlock(&kvm->lock);
> 
> ... but as it's mostly copy&paste from here I wouldn't mind if you do the cleanup in a follow-up patch.
> 
> > +	__inject_vm(kvm, inti);
> > 	return 0;
> > }
> > 
> > @@ -838,3 +836,207 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
> > 	mutex_unlock(&vcpu->kvm->lock);
> > 	return 0;
> > }
> > +
> > +static void clear_floating_interrupts(struct kvm *kvm)
> 
> This should eventually only require device scope, no?

Yes, will change that. 
> > +{
> > +	struct kvm_s390_float_interrupt *fi;
> > +	struct kvm_s390_interrupt_info	*n, *inti = NULL;
> > +
> > +	mutex_lock(&kvm->lock);
> > +	fi = &kvm->arch.float_int;
> > +	spin_lock(&fi->lock);
> > +	list_for_each_entry_safe(inti, n, &fi->list, list) {
> > +		list_del(&inti->list);
> > +		kfree(inti);
> > +	}
> > +	atomic_set(&fi->active, 0);
> > +	spin_unlock(&fi->lock);
> > +	mutex_unlock(&kvm->lock);
> > +}
> > +
> > +static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
> > +				   u64 addr)
> > +{
> > +	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
> > +	void __user *target;
> > +	void *source;
> > +	u64 size;
> > +	int r = 0;
> > +
> > +	switch (inti->type) {
> > +	case KVM_S390_INT_VIRTIO:
> > +	case KVM_S390_INT_SERVICE:
> > +		source = &inti->ext;
> > +		target = &uptr->ext;
> > +		size = sizeof(inti->ext);
> > +		break;
> > +	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
> > +		source = &inti->io;
> > +		target = &uptr->io;
> > +		size = sizeof(inti->io);
> > +		break;
> > +	case KVM_S390_MCHK:
> > +		source = &inti->mchk;
> > +		target = &uptr->mchk;
> > +		size = sizeof(inti->mchk);
> > +		break;
> > +	case KVM_S390_INT_MAX:
> > +		goto out;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +
> > +	r = put_user(inti->type, (u64 __user *) &uptr->type);
> > +	if (copy_to_user(target, source, size))
> > +		r = -EFAULT;
> > +
> > +out:
> > +	return r;
> > +}
> > +
> > +static int dequeue_floating_irq(struct kvm *kvm, __u64 addr)
> 
> I don't understand the purpose of this function. Why would you want to dequeue (and obtain all information about) a floating interrupt without checking for its type? 

Again, I guess the name "dequeue" is misleading. It simply means to take an 
interrupt off the list. This function is currently used for migration only. 

> In a normal interrupt controller world you would say "lower the line for interrupt line X now".  I'm missing that "interrupt line X" parameter.

No "interrupt lines" in s390. We "disable" interrupts by turning off bits
in the PSW or control registers. But this function has nothing to do with that. 

I'll try to make the naming more obvious with my upcoming patchset.


Thanks for the review!

regards
Jens


> 
> 
> Alex
> 
> > +{
> > +	struct kvm_s390_interrupt_info *inti;
> > +	struct kvm_s390_float_interrupt *fi;
> > +	int r = 0;
> > +
> > +
> > +	mutex_lock(&kvm->lock);
> > +	fi = &kvm->arch.float_int;
> > +	spin_lock(&fi->lock);
> > +	if (list_empty(&fi->list)) {
> > +		mutex_unlock(&kvm->lock);
> > +		spin_unlock(&fi->lock);
> > +		return -ENODATA;
> > +	}
> > +	inti = list_first_entry(&fi->list,
> > +			struct kvm_s390_interrupt_info, list);
> > +	list_del(&inti->list);
> > +	spin_unlock(&fi->lock);
> > +	mutex_unlock(&kvm->lock);
> > +
> > +	r = copy_irq_to_user(inti, addr);
> > +
> > +	kfree(inti);
> > +	return r;
> > +}
> > +
> > +static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> > +{
> > +	int r;
> > +
> > +	switch (attr->group) {
> > +	case KVM_DEV_FLIC_DEQUEUE:
> > +		r = dequeue_floating_irq(dev->kvm, attr->addr);
> > +		break;
> > +	default:
> > +		r = -EINVAL;
> > +	}
> > +
> > +	return r;
> > +}
> > +
> > +static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
> > +				     u64 addr)
> > +{
> > +	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
> > +	void *target = NULL;
> > +	void *source;
> > +	u64 size;
> > +	int r = 0;
> > +
> > +	if (get_user(inti->type, (u64 __user *)addr))
> > +		return -EFAULT;
> > +	switch (inti->type) {
> > +	case KVM_S390_INT_VIRTIO:
> > +	case KVM_S390_INT_SERVICE:
> > +		target = (void *) &inti->ext;
> > +		source = &uptr->ext;
> > +		size = sizeof(inti->ext);
> > +		break;
> > +	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
> > +		target = (void *) &inti->io;
> > +		source = &uptr->io;
> > +		size = sizeof(inti->io);
> > +		break;
> > +	case KVM_S390_MCHK:
> > +		target = (void *) &inti->mchk;
> > +		source = &uptr->mchk;
> > +		size = sizeof(inti->mchk);
> > +		break;
> > +	case KVM_S390_INT_MAX:
> > +		goto out;
> > +	default:
> > +		r = -EINVAL;
> > +		return r;
> > +	}
> > +
> > +	if (copy_from_user(target, source, size))
> > +		r = -EFAULT;
> > +
> > +out:
> > +	return r;
> > +}
> > +
> > +static int enqueue_floating_irq(struct kvm_device *dev,
> > +				 struct kvm_device_attr *attr)
> > +{
> > +	struct kvm_s390_interrupt_info *inti = NULL;
> > +	int r = 0;
> > +
> > +	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
> > +	if (!inti)
> > +		return -ENOMEM;
> > +
> > +	r = copy_irq_from_user(inti, attr->addr);
> > +	if (r) {
> > +		kfree(inti);
> > +		return r;
> > +	}
> > +	__inject_vm(dev->kvm, inti);
> > +
> > +	return r;
> > +}
> > +
> > +static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
> > +{
> > +	int r = 0;
> > +
> > +	switch (attr->group) {
> > +	case KVM_DEV_FLIC_ENQUEUE:
> > +		r = enqueue_floating_irq(dev, attr);
> > +		break;
> > +	case KVM_DEV_FLIC_CLEAR_IRQS:
> > +		r = 0;
> > +		clear_floating_interrupts(dev->kvm);
> > +		break;
> > +	default:
> > +		r = -EINVAL;
> > +	}
> > +
> > +	return r;
> > +}
> > +
> > +static int flic_create(struct kvm_device *dev, u32 type)
> > +{
> > +	if (!dev)
> > +		return -EINVAL;
> > +	if (dev->kvm->arch.flic)
> > +		return -EINVAL;
> > +	dev->kvm->arch.flic = dev;
> > +	return 0;
> > +}
> > +
> > +static void flic_destroy(struct kvm_device *dev)
> > +{
> > +	dev->kvm->arch.flic = NULL;
> > +}
> > +
> > +/* s390 floating irq controller (flic) */
> > +struct kvm_device_ops kvm_flic_ops = {
> > +	.name = "kvm-flic",
> > +	.get_attr = flic_get_attr,
> > +	.set_attr = flic_set_attr,
> > +	.create = flic_create,
> > +	.destroy = flic_destroy,
> > +};
> > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> > index 776dafe..760febf 100644
> > --- a/arch/s390/kvm/kvm-s390.c
> > +++ b/arch/s390/kvm/kvm-s390.c
> > @@ -157,6 +157,7 @@ int kvm_dev_ioctl_check_extension(long ext)
> > 	case KVM_CAP_ENABLE_CAP:
> > 	case KVM_CAP_S390_CSS_SUPPORT:
> > 	case KVM_CAP_IOEVENTFD:
> > +	case KVM_CAP_DEVICE_CTRL:
> > 		r = 1;
> > 		break;
> > 	case KVM_CAP_NR_VCPUS:
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index ca645a0..e89304d 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -1065,6 +1065,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
> > 
> > extern struct kvm_device_ops kvm_mpic_ops;
> > extern struct kvm_device_ops kvm_xics_ops;
> > +extern struct kvm_device_ops kvm_flic_ops;
> > 
> > #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
> > 
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index eeb08a1..9c8b457 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -426,6 +426,7 @@ struct kvm_s390_psw {
> > 	 ((ai) << 26))
> > #define KVM_S390_INT_IO_MIN		0x00000000u
> > #define KVM_S390_INT_IO_MAX		0xfffdffffu
> > +#define KVM_S390_INT_MAX		0xffffffffu
> > 
> > 
> > struct kvm_s390_interrupt {
> > @@ -906,6 +907,7 @@ struct kvm_device_attr {
> > #define KVM_DEV_TYPE_FSL_MPIC_20	1
> > #define KVM_DEV_TYPE_FSL_MPIC_42	2
> > #define KVM_DEV_TYPE_XICS		3
> > +#define KVM_DEV_TYPE_FLIC		4
> > 
> > /*
> >  * ioctls for VM fds
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index bf040c4..79a2ecc 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -2265,6 +2265,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
> > 		ops = &kvm_xics_ops;
> > 		break;
> > #endif
> > +#ifdef CONFIG_S390
> > +	case KVM_DEV_TYPE_FLIC:
> > +		ops = &kvm_flic_ops;
> > +		break;
> > +#endif
> > 	default:
> > 		return -ENODEV;
> > 	}
> > -- 
> > 1.8.3.4
> > 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux