This patch adds a floating irq controller as a kvm_device. It will be necessary for migration of floating interrupts as well as for hardening the reset code by allowing user space to explicitly remove all pending floating interrupts. Signed-off-by: Jens Freimann <jfrei@xxxxxxxxxxxxxxxxxx> Reviewed-by: Cornelia Huck <cornelia.huck@xxxxxxxxxx> --- Documentation/virtual/kvm/devices/s390_flic.txt | 36 +++ arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/uapi/asm/kvm.h | 5 + arch/s390/kvm/interrupt.c | 304 ++++++++++++++++++++---- arch/s390/kvm/kvm-s390.c | 1 + include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 2 + virt/kvm/kvm_main.c | 5 + 8 files changed, 304 insertions(+), 51 deletions(-) diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt new file mode 100644 index 0000000..06aef31 --- /dev/null +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -0,0 +1,36 @@ +FLIC (floating interrupt controller) +==================================== + +FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some +machine check interruptions. All interrupts are stored in a per-vm list of +pending interrupts. FLIC performs operations on this list. + +Only one FLIC instance may be instantiated. + +FLIC provides support to +- add/delete interrupts (KVM_DEV_FLIC_ENQUEUE and _DEQUEUE) +- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) + +Groups: + KVM_DEV_FLIC_ENQUEUE + Adds one interrupt to the list of pending floating interrupts. Interrupts + are taken from this list for injection into the guest. attr contains + a struct kvm_s390_irq which contains all data relevant for + interrupt injection. + The format of the data structure kvm_s390_irq as it is copied from userspace + is defined in usr/include/linux/kvm.h. + For historic reasons list members are stored in a different data structure, i.e. + we need to copy the relevant data into a struct kvm_s390_interrupt_info + which can then be added to the list. + + KVM_DEV_FLIC_DEQUEUE + Takes one element off the pending interrupts list and copies it into userspace. + Dequeued interrupts are not injected into the guest. + attr->addr contains the userspace address of a struct kvm_s390_irq. + List elements are stored in the format of struct kvm_s390_interrupt_info + (arch/s390/include/asm/kvm_host.h) and are copied into a struct kvm_s390_irq + (usr/include/linux/kvm.h) + + KVM_DEV_FLIC_CLEAR_IRQS + Simply deletes all elements from the list of currently pending floating interrupts. + No interrupts are injected into the guest. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index adb05c5..e1cc166 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -238,6 +238,7 @@ struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; + struct kvm_device *flic; struct gmap *gmap; int css_support; }; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index d25da59..33d52b8 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -16,6 +16,11 @@ #define __KVM_S390 +/* Device control API: s390-specific devices */ +#define KVM_DEV_FLIC_DEQUEUE 1 +#define KVM_DEV_FLIC_ENQUEUE 2 +#define KVM_DEV_FLIC_CLEAR_IRQS 3 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7f35cb3..d6d5e36 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -656,53 +656,86 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, return inti; } -int kvm_s390_inject_vm(struct kvm *kvm, - struct kvm_s390_interrupt *s390int) +static void __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti, *iter; + struct kvm_s390_interrupt_info *iter; int sigcpu; + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + if (!is_ioint(inti->type)) { + list_add_tail(&inti->list, &fi->list); + } else { + u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + + /* Keep I/O interrupts sorted in isc order. */ + list_for_each_entry(iter, &fi->list, list) { + if (!is_ioint(iter->type)) + continue; + if (int_word_to_isc_bits(iter->io.io_int_word) + <= isc_bits) + continue; + break; + } + list_add_tail(&inti->list, &iter->list); + } + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (fi->local_int[sigcpu] == NULL); + } + li = fi->local_int[sigcpu]; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(li->wq)) + wake_up_interruptible(li->wq); + spin_unlock_bh(&li->lock); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct kvm_s390_interrupt_info *inti; + inti = kzalloc(sizeof(*inti), GFP_KERNEL); if (!inti) return -ENOMEM; - switch (s390int->type) { + inti->type = s390int->type; + switch (inti->type) { case KVM_S390_INT_VIRTIO: VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", s390int->parm, s390int->parm64); - inti->type = s390int->type; inti->ext.ext_params = s390int->parm; inti->ext.ext_params2 = s390int->parm64; break; case KVM_S390_INT_SERVICE: VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); - inti->type = s390int->type; inti->ext.ext_params = s390int->parm; break; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_INT_EXTERNAL_CALL: - case KVM_S390_INT_EMERGENCY: - kfree(inti); - return -EINVAL; case KVM_S390_MCHK: VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", s390int->parm64); - inti->type = s390int->type; inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (s390int->type & IOINT_AI_MASK) + if (inti->type & IOINT_AI_MASK) VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); else VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", s390int->type & IOINT_CSSID_MASK, s390int->type & IOINT_SSID_MASK, s390int->type & IOINT_SCHID_MASK); - inti->type = s390int->type; inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; @@ -715,42 +748,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 2); - mutex_lock(&kvm->lock); - fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - if (!is_ioint(inti->type)) - list_add_tail(&inti->list, &fi->list); - else { - u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); - - /* Keep I/O interrupts sorted in isc order. */ - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (int_word_to_isc_bits(iter->io.io_int_word) - <= isc_bits) - continue; - break; - } - list_add_tail(&inti->list, &iter->list); - } - atomic_set(&fi->active, 1); - sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); - if (sigcpu == KVM_MAX_VCPUS) { - do { - sigcpu = fi->next_rr_cpu++; - if (sigcpu == KVM_MAX_VCPUS) - sigcpu = fi->next_rr_cpu = 0; - } while (fi->local_int[sigcpu] == NULL); - } - li = fi->local_int[sigcpu]; - spin_lock_bh(&li->lock); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); - if (waitqueue_active(li->wq)) - wake_up_interruptible(li->wq); - spin_unlock_bh(&li->lock); - spin_unlock(&fi->lock); - mutex_unlock(&kvm->lock); + __inject_vm(kvm, inti); return 0; } @@ -838,3 +836,207 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, mutex_unlock(&vcpu->kvm->lock); return 0; } + +static void clear_floating_interrupts(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *n, *inti = NULL; + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + list_del(&inti->list); + kfree(inti); + } + atomic_set(&fi->active, 0); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); +} + +static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, + u64 addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + void __user *target; + void *source; + u64 size; + int r = 0; + + switch (inti->type) { + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + source = &inti->ext; + target = &uptr->ext; + size = sizeof(inti->ext); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + source = &inti->io; + target = &uptr->io; + size = sizeof(inti->io); + break; + case KVM_S390_MCHK: + source = &inti->mchk; + target = &uptr->mchk; + size = sizeof(inti->mchk); + break; + case KVM_S390_INT_MAX: + goto out; + default: + return -EINVAL; + } + + r = put_user(inti->type, (u64 __user *) &uptr->type); + if (copy_to_user(target, source, size)) + r = -EFAULT; + +out: + return r; +} + +static int dequeue_floating_irq(struct kvm *kvm, __u64 addr) +{ + struct kvm_s390_interrupt_info *inti; + struct kvm_s390_float_interrupt *fi; + int r = 0; + + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock(&fi->lock); + if (list_empty(&fi->list)) { + mutex_unlock(&kvm->lock); + spin_unlock(&fi->lock); + return -ENODATA; + } + inti = list_first_entry(&fi->list, + struct kvm_s390_interrupt_info, list); + list_del(&inti->list); + spin_unlock(&fi->lock); + mutex_unlock(&kvm->lock); + + r = copy_irq_to_user(inti, addr); + + kfree(inti); + return r; +} + +static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_FLIC_DEQUEUE: + r = dequeue_floating_irq(dev->kvm, attr->addr); + break; + default: + r = -EINVAL; + } + + return r; +} + +static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti, + u64 addr) +{ + struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; + void *target = NULL; + void *source; + u64 size; + int r = 0; + + if (get_user(inti->type, (u64 __user *)addr)) + return -EFAULT; + switch (inti->type) { + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + target = (void *) &inti->ext; + source = &uptr->ext; + size = sizeof(inti->ext); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + target = (void *) &inti->io; + source = &uptr->io; + size = sizeof(inti->io); + break; + case KVM_S390_MCHK: + target = (void *) &inti->mchk; + source = &uptr->mchk; + size = sizeof(inti->mchk); + break; + case KVM_S390_INT_MAX: + goto out; + default: + r = -EINVAL; + return r; + } + + if (copy_from_user(target, source, size)) + r = -EFAULT; + +out: + return r; +} + +static int enqueue_floating_irq(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int r = 0; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + r = copy_irq_from_user(inti, attr->addr); + if (r) { + kfree(inti); + return r; + } + __inject_vm(dev->kvm, inti); + + return r; +} + +static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int r = 0; + + switch (attr->group) { + case KVM_DEV_FLIC_ENQUEUE: + r = enqueue_floating_irq(dev, attr); + break; + case KVM_DEV_FLIC_CLEAR_IRQS: + r = 0; + clear_floating_interrupts(dev->kvm); + break; + default: + r = -EINVAL; + } + + return r; +} + +static int flic_create(struct kvm_device *dev, u32 type) +{ + if (!dev) + return -EINVAL; + if (dev->kvm->arch.flic) + return -EINVAL; + dev->kvm->arch.flic = dev; + return 0; +} + +static void flic_destroy(struct kvm_device *dev) +{ + dev->kvm->arch.flic = NULL; +} + +/* s390 floating irq controller (flic) */ +struct kvm_device_ops kvm_flic_ops = { + .name = "kvm-flic", + .get_attr = flic_get_attr, + .set_attr = flic_set_attr, + .create = flic_create, + .destroy = flic_destroy, +}; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 776dafe..760febf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -157,6 +157,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ca645a0..e89304d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1065,6 +1065,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_flic_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index eeb08a1..9c8b457 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -426,6 +426,7 @@ struct kvm_s390_psw { ((ai) << 26)) #define KVM_S390_INT_IO_MIN 0x00000000u #define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_MAX 0xffffffffu struct kvm_s390_interrupt { @@ -906,6 +907,7 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_FLIC 4 /* * ioctls for VM fds diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index bf040c4..79a2ecc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2265,6 +2265,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ops = &kvm_xics_ops; break; #endif +#ifdef CONFIG_S390 + case KVM_DEV_TYPE_FLIC: + ops = &kvm_flic_ops; + break; +#endif default: return -ENODEV; } -- 1.8.3.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html