On 27.04.2013, at 12:28, Paul Mackerras wrote: > This adds the API for userspace to instantiate an XICS device in a VM > and connect VCPUs to it. The API consists of a new device type for > the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which > functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl, > which is used to assert and deassert interrupt inputs of the XICS. > > The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES. > Each attribute within this group corresponds to the state of one > interrupt source. The attribute number is the same as the interrupt > source number. > > This does not support irq routing or irqfd yet. > > Signed-off-by: Paul Mackerras <paulus@xxxxxxxxx> Thanks, applied to kvm-ppc-queue. I'll try to push this for 3.10 still. Alex > --- > Documentation/virtual/kvm/api.txt | 8 ++ > Documentation/virtual/kvm/devices/xics.txt | 66 ++++++++++ > arch/powerpc/include/asm/kvm_ppc.h | 2 + > arch/powerpc/include/uapi/asm/kvm.h | 12 ++ > arch/powerpc/kvm/book3s_xics.c | 190 ++++++++++++++++++++++++---- > arch/powerpc/kvm/book3s_xics.h | 1 + > arch/powerpc/kvm/irq.h | 3 + > arch/powerpc/kvm/powerpc.c | 22 ++++ > include/linux/kvm_host.h | 1 + > include/uapi/linux/kvm.h | 2 + > virt/kvm/kvm_main.c | 5 + > 11 files changed, 287 insertions(+), 25 deletions(-) > create mode 100644 Documentation/virtual/kvm/devices/xics.txt > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index c09d183..03492f9 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -2772,3 +2772,11 @@ Parameters: args[0] is the MPIC device fd > args[1] is the MPIC CPU number for this vcpu > > This capability connects the vcpu to an in-kernel MPIC device. > + > +6.7 KVM_CAP_IRQ_XICS > + > +Architectures: ppc > +Parameters: args[0] is the XICS device fd > + args[1] is the XICS CPU number (server ID) for this vcpu > + > +This capability connects the vcpu to an in-kernel XICS device. > diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt > new file mode 100644 > index 0000000..4286493 > --- /dev/null > +++ b/Documentation/virtual/kvm/devices/xics.txt > @@ -0,0 +1,66 @@ > +XICS interrupt controller > + > +Device type supported: KVM_DEV_TYPE_XICS > + > +Groups: > + KVM_DEV_XICS_SOURCES > + Attributes: One per interrupt source, indexed by the source number. > + > +This device emulates the XICS (eXternal Interrupt Controller > +Specification) defined in PAPR. The XICS has a set of interrupt > +sources, each identified by a 20-bit source number, and a set of > +Interrupt Control Presentation (ICP) entities, also called "servers", > +each associated with a virtual CPU. > + > +The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH > +capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and > +the interrupt server number (i.e. the vcpu number from the XICS's > +point of view) in args[1] of the kvm_enable_cap struct. Each ICP has > +64 bits of state which can be read and written using the > +KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit > +state word has the following bitfields, starting at the > +least-significant end of the word: > + > +* Unused, 16 bits > + > +* Pending interrupt priority, 8 bits > + Zero is the highest priority, 255 means no interrupt is pending. > + > +* Pending IPI (inter-processor interrupt) priority, 8 bits > + Zero is the highest priority, 255 means no IPI is pending. > + > +* Pending interrupt source number, 24 bits > + Zero means no interrupt pending, 2 means an IPI is pending > + > +* Current processor priority, 8 bits > + Zero is the highest priority, meaning no interrupts can be > + delivered, and 255 is the lowest priority. > + > +Each source has 64 bits of state that can be read and written using > +the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the > +KVM_DEV_XICS_SOURCES attribute group, with the attribute number being > +the interrupt source number. The 64 bit state word has the following > +bitfields, starting from the least-significant end of the word: > + > +* Destination (server number), 32 bits > + This specifies where the interrupt should be sent, and is the > + interrupt server number specified for the destination vcpu. > + > +* Priority, 8 bits > + This is the priority specified for this interrupt source, where 0 is > + the highest priority and 255 is the lowest. An interrupt with a > + priority of 255 will never be delivered. > + > +* Level sensitive flag, 1 bit > + This bit is 1 for a level-sensitive interrupt source, or 0 for > + edge-sensitive (or MSI). > + > +* Masked flag, 1 bit > + This bit is set to 1 if the interrupt is masked (cannot be delivered > + regardless of its priority), for example by the ibm,int-off RTAS > + call, or 0 if it is not masked. > + > +* Pending flag, 1 bit > + This bit is 1 if the source has a pending interrupt, otherwise 0. > + > +Only one XICS instance may be created per VM. > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index d7339df..a5287fe 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -315,6 +315,8 @@ extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); > extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); > extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); > extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); > +extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, > + struct kvm_vcpu *vcpu, u32 cpu); > #else > static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) > { return 0; } > diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h > index 427b9ac..0fb1a6e 100644 > --- a/arch/powerpc/include/uapi/asm/kvm.h > +++ b/arch/powerpc/include/uapi/asm/kvm.h > @@ -499,4 +499,16 @@ struct kvm_get_htab_header { > #define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a) > #define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b) > > +/* PPC64 eXternal Interrupt Controller Specification */ > +#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ > + > +/* Layout of 64-bit source attribute values */ > +#define KVM_XICS_DESTINATION_SHIFT 0 > +#define KVM_XICS_DESTINATION_MASK 0xffffffffULL > +#define KVM_XICS_PRIORITY_SHIFT 32 > +#define KVM_XICS_PRIORITY_MASK 0xff > +#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40) > +#define KVM_XICS_MASKED (1ULL << 41) > +#define KVM_XICS_PENDING (1ULL << 42) > + > #endif /* __LINUX_KVM_POWERPC_H */ > diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c > index ee841ed..f7a1037 100644 > --- a/arch/powerpc/kvm/book3s_xics.c > +++ b/arch/powerpc/kvm/book3s_xics.c > @@ -11,6 +11,7 @@ > #include <linux/kvm_host.h> > #include <linux/err.h> > #include <linux/gfp.h> > +#include <linux/anon_inodes.h> > > #include <asm/uaccess.h> > #include <asm/kvm_book3s.h> > @@ -55,8 +56,6 @@ > * > * - Make ICS lockless as well, or at least a per-interrupt lock or hashed > * locks array to improve scalability > - * > - * - ioctl's to save/restore the entire state for snapshot & migration > */ > > /* -- ICS routines -- */ > @@ -64,7 +63,8 @@ > static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, > u32 new_irq); > > -static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) > +static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, > + bool report_status) > { > struct ics_irq_state *state; > struct kvmppc_ics *ics; > @@ -81,6 +81,9 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) > if (!state->exists) > return -EINVAL; > > + if (report_status) > + return state->asserted; > + > /* > * We set state->asserted locklessly. This should be fine as > * we are the only setter, thus concurrent access is undefined > @@ -96,7 +99,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) > /* Attempt delivery */ > icp_deliver_irq(xics, NULL, irq); > > - return 0; > + return state->asserted; > } > > static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, > @@ -891,8 +894,8 @@ static void xics_debugfs_init(struct kvmppc_xics *xics) > kfree(name); > } > > -struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, > - struct kvmppc_xics *xics, int irq) > +static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, > + struct kvmppc_xics *xics, int irq) > { > struct kvmppc_ics *ics; > int i, icsid; > @@ -1044,34 +1047,138 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) > return 0; > } > > -/* -- ioctls -- */ > +static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) > +{ > + int ret; > + struct kvmppc_ics *ics; > + struct ics_irq_state *irqp; > + u64 __user *ubufp = (u64 __user *) addr; > + u16 idx; > + u64 val, prio; > + > + ics = kvmppc_xics_find_ics(xics, irq, &idx); > + if (!ics) > + return -ENOENT; > > -int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) > + irqp = &ics->irq_state[idx]; > + mutex_lock(&ics->lock); > + ret = -ENOENT; > + if (irqp->exists) { > + val = irqp->server; > + prio = irqp->priority; > + if (prio == MASKED) { > + val |= KVM_XICS_MASKED; > + prio = irqp->saved_priority; > + } > + val |= prio << KVM_XICS_PRIORITY_SHIFT; > + if (irqp->asserted) > + val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; > + else if (irqp->masked_pending || irqp->resend) > + val |= KVM_XICS_PENDING; > + ret = 0; > + } > + mutex_unlock(&ics->lock); > + > + if (!ret && put_user(val, ubufp)) > + ret = -EFAULT; > + > + return ret; > +} > + > +static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) > { > - struct kvmppc_xics *xics; > - int r; > + struct kvmppc_ics *ics; > + struct ics_irq_state *irqp; > + u64 __user *ubufp = (u64 __user *) addr; > + u16 idx; > + u64 val; > + u8 prio; > + u32 server; > + > + if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) > + return -ENOENT; > + > + ics = kvmppc_xics_find_ics(xics, irq, &idx); > + if (!ics) { > + ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); > + if (!ics) > + return -ENOMEM; > + } > + irqp = &ics->irq_state[idx]; > + if (get_user(val, ubufp)) > + return -EFAULT; > + > + server = val & KVM_XICS_DESTINATION_MASK; > + prio = val >> KVM_XICS_PRIORITY_SHIFT; > + if (prio != MASKED && > + kvmppc_xics_find_server(xics->kvm, server) == NULL) > + return -EINVAL; > > - /* locking against multiple callers? */ > + mutex_lock(&ics->lock); > + irqp->server = server; > + irqp->saved_priority = prio; > + if (val & KVM_XICS_MASKED) > + prio = MASKED; > + irqp->priority = prio; > + irqp->resend = 0; > + irqp->masked_pending = 0; > + irqp->asserted = 0; > + if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) > + irqp->asserted = 1; > + irqp->exists = 1; > + mutex_unlock(&ics->lock); > > - xics = kvm->arch.xics; > - if (!xics) > - return -ENODEV; > + if (val & KVM_XICS_PENDING) > + icp_deliver_irq(xics, NULL, irqp->number); > > - switch (args->level) { > - case KVM_INTERRUPT_SET: > - case KVM_INTERRUPT_SET_LEVEL: > - case KVM_INTERRUPT_UNSET: > - r = ics_deliver_irq(xics, args->irq, args->level); > - break; > - default: > - r = -EINVAL; > + return 0; > +} > + > +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, > + bool line_status) > +{ > + struct kvmppc_xics *xics = kvm->arch.xics; > + > + return ics_deliver_irq(xics, irq, level, line_status); > +} > + > +static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + struct kvmppc_xics *xics = dev->private; > + > + switch (attr->group) { > + case KVM_DEV_XICS_GRP_SOURCES: > + return xics_set_source(xics, attr->attr, attr->addr); > } > + return -ENXIO; > +} > > - return r; > +static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + struct kvmppc_xics *xics = dev->private; > + > + switch (attr->group) { > + case KVM_DEV_XICS_GRP_SOURCES: > + return xics_get_source(xics, attr->attr, attr->addr); > + } > + return -ENXIO; > } > > -void kvmppc_xics_free(struct kvmppc_xics *xics) > +static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > { > + switch (attr->group) { > + case KVM_DEV_XICS_GRP_SOURCES: > + if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && > + attr->attr < KVMPPC_XICS_NR_IRQS) > + return 0; > + break; > + } > + return -ENXIO; > +} > + > +static void kvmppc_xics_free(struct kvm_device *dev) > +{ > + struct kvmppc_xics *xics = dev->private; > int i; > struct kvm *kvm = xics->kvm; > > @@ -1083,17 +1190,21 @@ void kvmppc_xics_free(struct kvmppc_xics *xics) > for (i = 0; i <= xics->max_icsid; i++) > kfree(xics->ics[i]); > kfree(xics); > + kfree(dev); > } > > -int kvm_xics_create(struct kvm *kvm, u32 type) > +static int kvmppc_xics_create(struct kvm_device *dev, u32 type) > { > struct kvmppc_xics *xics; > + struct kvm *kvm = dev->kvm; > int ret = 0; > > xics = kzalloc(sizeof(*xics), GFP_KERNEL); > if (!xics) > return -ENOMEM; > > + dev->private = xics; > + xics->dev = dev; > xics->kvm = kvm; > > /* Already there ? */ > @@ -1120,6 +1231,35 @@ int kvm_xics_create(struct kvm *kvm, u32 type) > return 0; > } > > +struct kvm_device_ops kvm_xics_ops = { > + .name = "kvm-xics", > + .create = kvmppc_xics_create, > + .destroy = kvmppc_xics_free, > + .set_attr = xics_set_attr, > + .get_attr = xics_get_attr, > + .has_attr = xics_has_attr, > +}; > + > +int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, > + u32 xcpu) > +{ > + struct kvmppc_xics *xics = dev->private; > + int r = -EBUSY; > + > + if (dev->ops != &kvm_xics_ops) > + return -EPERM; > + if (xics->kvm != vcpu->kvm) > + return -EPERM; > + if (vcpu->arch.irq_type) > + return -EBUSY; > + > + r = kvmppc_xics_create_icp(vcpu, xcpu); > + if (!r) > + vcpu->arch.irq_type = KVMPPC_IRQ_XICS; > + > + return r; > +} > + > void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) > { > if (!vcpu->arch.icp) > diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h > index e4fdec3..dd9326c 100644 > --- a/arch/powerpc/kvm/book3s_xics.h > +++ b/arch/powerpc/kvm/book3s_xics.h > @@ -88,6 +88,7 @@ struct kvmppc_ics { > > struct kvmppc_xics { > struct kvm *kvm; > + struct kvm_device *dev; > struct dentry *dentry; > u32 max_icsid; > bool real_mode; > diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h > index f1e27fd..5a9a10b 100644 > --- a/arch/powerpc/kvm/irq.h > +++ b/arch/powerpc/kvm/irq.h > @@ -10,6 +10,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > #ifdef CONFIG_KVM_MPIC > ret = ret || (kvm->arch.mpic != NULL); > #endif > +#ifdef CONFIG_KVM_XICS > + ret = ret || (kvm->arch.xics != NULL); > +#endif > smp_rmb(); > return ret; > } > diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c > index 31084c6..7a03d74 100644 > --- a/arch/powerpc/kvm/powerpc.c > +++ b/arch/powerpc/kvm/powerpc.c > @@ -342,6 +342,9 @@ int kvm_dev_ioctl_check_extension(long ext) > case KVM_CAP_SPAPR_TCE: > case KVM_CAP_PPC_ALLOC_HTAB: > case KVM_CAP_PPC_RTAS: > +#ifdef CONFIG_KVM_XICS > + case KVM_CAP_IRQ_XICS: > +#endif > r = 1; > break; > #endif /* CONFIG_PPC_BOOK3S_64 */ > @@ -822,6 +825,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, > break; > } > #endif > +#ifdef CONFIG_KVM_XICS > + case KVM_CAP_IRQ_XICS: { > + struct file *filp; > + struct kvm_device *dev; > + > + r = -EBADF; > + filp = fget(cap->args[0]); > + if (!filp) > + break; > + > + r = -EPERM; > + dev = kvm_device_from_filp(filp); > + if (dev) > + r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); > + > + fput(filp); > + break; > + } > +#endif /* CONFIG_KVM_XICS */ > default: > r = -EINVAL; > break; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 36c9776..9d686a5 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -1101,6 +1101,7 @@ void kvm_device_put(struct kvm_device *dev); > struct kvm_device *kvm_device_from_filp(struct file *filp); > > extern struct kvm_device_ops kvm_mpic_ops; > +extern struct kvm_device_ops kvm_xics_ops; > > #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index d400519..b163d5c 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -669,6 +669,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_DEVICE_CTRL 89 > #define KVM_CAP_IRQ_MPIC 90 > #define KVM_CAP_PPC_RTAS 91 > +#define KVM_CAP_IRQ_XICS 92 > > #ifdef KVM_CAP_IRQ_ROUTING > > @@ -841,6 +842,7 @@ struct kvm_device_attr { > > #define KVM_DEV_TYPE_FSL_MPIC_20 1 > #define KVM_DEV_TYPE_FSL_MPIC_42 2 > +#define KVM_DEV_TYPE_XICS 3 > > /* > * ioctls for VM fds > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 5da9f02..e6e7abe 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -2247,6 +2247,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, > ops = &kvm_mpic_ops; > break; > #endif > +#ifdef CONFIG_KVM_XICS > + case KVM_DEV_TYPE_XICS: > + ops = &kvm_xics_ops; > + break; > +#endif > default: > return -ENODEV; > } > -- > 1.7.10.4 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html