This is an initial variant of the in-kernel XICS emulation for both HV and PR KVM running in PAPR mode. This is based on an initial implementation by Michael Ellerman <michael@xxxxxxxxxxxxxx> reworked by myself. It supports up to 4095 "BUID" (blocks of interrupts) of up to 4096 interrupts each. Signed-off-by: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> --- arch/powerpc/include/asm/kvm.h | 39 ++ arch/powerpc/include/asm/kvm_host.h | 8 + arch/powerpc/include/asm/kvm_ppc.h | 30 +- arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s.c | 3 +- arch/powerpc/kvm/book3s_hv.c | 20 + arch/powerpc/kvm/book3s_pr.c | 13 + arch/powerpc/kvm/book3s_pr_papr.c | 19 +- arch/powerpc/kvm/book3s_rtas.c | 51 +- arch/powerpc/kvm/book3s_xics.c | 882 +++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke.c | 3 +- arch/powerpc/kvm/powerpc.c | 28 +- include/linux/kvm.h | 9 +- 13 files changed, 1090 insertions(+), 16 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_xics.c diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 3dc91df..f653424 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -296,6 +296,45 @@ struct kvm_rtas_token_args { __u64 token; /* Use a token of 0 to undefine a mapping */ }; +/* for KVM_CAP_SPAPR_XICS */ +#define __KVM_HAVE_IRQCHIP_ARGS +struct kvm_irqchip_args { +#define KVM_IRQCHIP_TYPE_ICP 0 /* XICS: ICP (presentation controller) */ +#define KVM_IRQCHIP_TYPE_ICS 1 /* XICS: ICS (source controller) */ + __u32 type; + union { + /* XICS ICP arguments. This needs to be called once before + * creating any VCPU to initialize the main kernel XICS data + * structures. + */ + struct { +#define KVM_ICP_FLAG_NOREALMODE 0x00000001 /* Disable real mode ICP */ + __u32 flags; + } icp; + + /* XICS ICS arguments. You can call this for every BUID you + * want to make available. + * + * The BUID is 12 bits, the interrupt number within a BUID + * is up to 12 bits as well. The resulting interrupt numbers + * exposed to the guest are BUID || IRQ which is 24 bit + * + * BUID cannot be 0. + */ + struct { + __u32 flags; + __u16 buid; + __u16 nr_irqs; + } ics; + }; +}; + +struct kvm_spapr_xics_xive { + __u32 irq; + __u32 server; + __u32 priority; +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d9c3f63..ccbf3dc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -187,6 +187,10 @@ struct kvmppc_linear_info { int type; }; +/* XICS components, defined in boo3s_xics.c */ +struct kvmppc_xics; +struct kvmppc_icp; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -251,6 +255,7 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; struct list_head rtas_tokens; + struct kvmppc_xics *xics; #endif }; @@ -532,6 +537,9 @@ struct kvm_vcpu_arch { u64 stolen_logged; struct kvmppc_vpa slb_shadow; #endif +#ifdef CONFIG_PPC_BOOK3S_64 + struct kvmppc_icp *icp; /* XICS presentation controller */ +#endif }; /* Values for vcpu->arch.state */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e23bfc6..ce81d91 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -102,8 +102,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); @@ -127,6 +126,12 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg); +extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu); +extern void kvmppc_xics_free(struct kvm *kvm); + extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, @@ -152,6 +157,8 @@ extern void kvmppc_bookehv_exit(void); extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); extern void kvmppc_rtas_tokens_free(struct kvm *kvm); +extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority); +extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority); /* * Cuts out inst bits with ordering according to spec. @@ -213,6 +220,25 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} + +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 + +extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg); + +static inline int kvmppc_xics_enabled(struct kvm *kvm) +{ + return kvm->arch.xics != NULL; +} + +#else +static inline int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, + unsigned long arg)\ +{ + return -ENOTTY; +} +static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; } #endif int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 536f65f..ec2f8da 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -81,6 +81,7 @@ kvm-book3s_64-module-objs := \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ + book3s_xics.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3f2a836..5c631e4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, vec); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6199063..b41e586 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -465,6 +465,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) /* Send the error out to userspace via KVM_RUN */ return rc; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu->kvm)) { + ret = kvmppc_xics_hcall(vcpu, req); + break; + } /* fallthrough */ default: return RESUME_HOST; } @@ -673,6 +681,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) kvmppc_set_pvr(vcpu, vcpu->arch.pvr); spin_lock_init(&vcpu->arch.vpa_update_lock); + /* Create the XICS */ + if (kvmppc_xics_enabled(kvm)) { + err = kvmppc_xics_create_icp(vcpu); + if (err < 0) + goto free_vcpu; + } + kvmppc_mmu_book3s_hv_init(vcpu); /* @@ -727,6 +742,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); + if (kvmppc_xics_enabled(vcpu->kvm)) + kvmppc_xics_free_icp(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -1602,6 +1619,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) kvmppc_rtas_tokens_free(kvm); + if (kvmppc_xics_enabled(kvm)) + kvmppc_xics_free(kvm); + kvmppc_free_hpt(kvm); WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9f4c13f..ab9776b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -984,6 +984,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err < 0) goto uninit_vcpu; + /* Create the XICS */ + if (kvmppc_xics_enabled(kvm)) { + err = kvmppc_xics_create_icp(vcpu); + if (err < 0) + goto free_vcpu; + } + return vcpu; uninit_vcpu: @@ -1000,6 +1007,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + if (kvmppc_xics_enabled(vcpu->kvm)) + kvmppc_xics_free_icp(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); kvm_vcpu_uninit(vcpu); kfree(vcpu_book3s->shadow_vcpu); @@ -1199,6 +1208,7 @@ int kvmppc_core_init_vm(struct kvm *kvm) { #ifdef CONFIG_PPC64 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); + INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif return 0; @@ -1209,6 +1219,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) #ifdef CONFIG_PPC64 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); #endif + if (kvmppc_xics_enabled(kvm)) + kvmppc_xics_free(kvm); + } static int kvmppc_book3s_init(void) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 175404a..8352cac 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -227,6 +227,15 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) +{ + long rc = kvmppc_xics_hcall(vcpu, cmd); + if (rc == H_TOO_HARD) + return EMULATE_FAIL; + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -246,11 +255,17 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; + case H_XIRR: + case H_CPPR: + case H_EOI: + case H_IPI: + if (kvmppc_xics_enabled(vcpu->kvm)) + return kvmppc_h_pr_xics_hcall(vcpu, cmd); + break; case H_RTAS: if (list_empty(&vcpu->kvm->arch.rtas_tokens)) return RESUME_HOST; - rc = kvmppc_rtas_hcall(vcpu); - if (rc != 0) + if (kvmppc_rtas_hcall(vcpu) != 0) break; kvmppc_set_gpr(vcpu, 3, 0); return EMULATE_DONE; diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 8a324e8..6a6c1fe 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -18,12 +18,61 @@ #include <asm/rtas.h> +static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 3 || args->nret != 1) { + rc = -3; + goto out; + } + + irq = args->args[0]; + server = args->args[1]; + priority = args->args[2]; + + rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); + if (rc) + rc = -3; +out: + args->rets[0] = rc; +} + +static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq, server, priority; + int rc; + + if (args->nargs != 1 || args->nret != 3) { + rc = -3; + goto out; + } + + irq = args->args[0]; + + server = priority = 0; + rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); + if (rc) { + rc = -3; + goto out; + } + + args->rets[1] = server; + args->rets[2] = priority; +out: + args->rets[0] = rc; +} + struct rtas_handler { void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args); char *name; }; -static struct rtas_handler rtas_handlers[] = { }; +static struct rtas_handler rtas_handlers[] = { + { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive }, + { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive }, +}; struct rtas_token_definition { struct list_head list; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c new file mode 100644 index 0000000..5638e21 --- /dev/null +++ b/arch/powerpc/kvm/book3s_xics.c @@ -0,0 +1,882 @@ +/* + * Copyright 2012 Michael Ellerman, IBM Corporation. + * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/err.h> +#include <linux/gfp.h> + +#include <asm/uaccess.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_ppc.h> +#include <asm/hvcall.h> +#include <asm/xics.h> +#include <asm/debug.h> + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#define MASKED 0xff + +#define XICS_DBG(fmt...) do { } while(0) + +#undef DEBUG_REALMODE + +/* + * LOCKING + * ======= + * + * Each ICP has its own lock, and there is one lock for the ICS (ie. all + * information about irq sources). + * + * The ICS lock nests inside any of the ICP locks. ie. you are allowed + * to take the ICS lock while holding an ICP lock, but not vice versa. + */ + +/* + * Interrupt numbering + * =================== + * + * The 24-bit global interrupt numbers are divided in two components, + * the BUID and the interrupt source. We have arbitrarily chosen a + * 10 bit + */ + +#define KVMPPC_XICS_MAX_BUID 0xfff +#define KVMPPC_XICS_IRQ_COUNT 0x1000 +#define KVMPPC_XICS_BUID_SHIFT 12 +#define KVMPPC_XICS_SRC_MASK 0xfff + +/* State for one irq in an ics */ +struct ics_irq_state { + u32 number; + u32 server; + u8 priority; + u8 saved_priority; /* currently unused */ + u8 resend; + u8 masked_pending; + u8 asserted; /* Only for LSI */ +}; + +#define ICP_RESEND_MAP_SIZE \ + ((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG) + +struct kvmppc_icp { + struct mutex lock; + struct kvm_vcpu *vcpu; + u32 pending_irq; /* XISR */ + u8 pending_priority; + u8 current_priority; /* CPPR */ + u8 mfrr; /* MFRR */ + bool need_resend; + unsigned long resend_map[ICP_RESEND_MAP_SIZE]; +}; + + +struct kvmppc_ics { + struct mutex lock; + u16 buid; + u16 nr_irqs; + struct ics_irq_state irq_state[]; +}; + +struct kvmppc_xics { + struct kvm *kvm; + struct dentry *dentry; + u32 max_buid; + struct kvmppc_ics *ics[KVMPPC_XICS_MAX_BUID]; /* [1...MAX_BUID] */ +}; + +static struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, u32 nr) +{ + struct kvm_vcpu *vcpu = NULL; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (nr == vcpu->vcpu_id) + return vcpu->arch.icp; + } + return NULL; +} + +static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, + u32 irq, u16 *source) +{ + u16 buid = irq >> KVMPPC_XICS_BUID_SHIFT; + u16 src = irq & KVMPPC_XICS_SRC_MASK; + struct kvmppc_ics *ics; + + ics = xics->ics[buid - 1]; + if (!ics) + return NULL; + if (src >= ics->nr_irqs) + return NULL; + if (source) + *source = src; + return ics; +} + + +/* -- ICS routines -- */ + +static void icp_deliver_irq(struct kvmppc_xics *xics, + struct kvmppc_icp *icp, + struct kvmppc_ics *ics, u16 src); + +static void __ics_reject_irq(struct kvmppc_icp *icp, + struct kvmppc_ics *ics, u16 src) +{ + struct ics_irq_state *state = &ics->irq_state[src]; + + XICS_DBG("server %d reject src %#x\n", icp->vcpu->vcpu_id, src); + + /* XXX check if it still level & asserted ? */ + state->resend = 1; + set_bit(ics->buid, icp->resend_map); + icp->need_resend = true; +} + +static void ics_reject_irq(struct kvmppc_xics *xics, + struct kvmppc_icp *icp, u32 irq) +{ + struct kvmppc_ics *ics; + u16 src; + + lockdep_assert_held(&icp->lock); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + pr_warning("ics_reject_irq: IRQ 0x%06x not found !\n", irq); + return; + } + + mutex_lock(&ics->lock); + __ics_reject_irq(icp, ics, src); + mutex_unlock(&ics->lock); +} + +static void ics_eoi(struct kvmppc_xics *xics, struct kvmppc_icp *icp, + u32 irq) +{ + struct ics_irq_state *state; + struct kvmppc_ics *ics; + u16 src; + + XICS_DBG("ics_eoi 0x%06x\n", irq); + + lockdep_assert_held(&icp->lock); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + pr_warning("ics_eoi: IRQ 0x%06x not found !\n", irq); + return; + } + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + + /* If it's an LSI and still asserted we resend */ + if (state->asserted) { + state->resend = 1; + set_bit(ics->buid, icp->resend_map); + icp->need_resend = true; + } + + mutex_unlock(&ics->lock); +} + +static void ics_deliver_irq(struct kvmppc_xics *xics, + u32 irq, u32 level) +{ + struct kvmppc_icp *icp; + struct ics_irq_state *state; + struct kvmppc_ics *ics; + bool deliver = false; + u32 server; + u16 src; + + XICS_DBG("ics deliver 0x%06x (level: %d)\n", irq, level); + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) { + pr_warning("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); + return; + } + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + + if (level == KVM_INTERRUPT_SET_LEVEL) + state->asserted = 1; + else if (level == KVM_INTERRUPT_UNSET) { + state->asserted = 0; + goto unlock; + } + + if (state->priority != MASKED) { + deliver = true; + server = state->server; + } else { + XICS_DBG("masked pending\n"); + state->masked_pending = 1; + } + +unlock: + mutex_unlock(&ics->lock); + + if (deliver) { + icp = kvmppc_xics_find_server(xics->kvm, server); + /* Configured server not found... XXX FALLBACK */ + if (icp) + icp_deliver_irq(xics, icp, ics, src); + } +} + +static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct kvmppc_icp *icp) +{ + u32 server = icp->vcpu->vcpu_id; + int i; + + mutex_lock(&ics->lock); + + for (i = 0; i < ics->nr_irqs; i++) { + struct ics_irq_state *state = &ics->irq_state[i]; + + if (!state->resend || state->server != server) + continue; + + XICS_DBG("resend 0x%06x prio %d\n", state->number, + state->priority); + + state->resend = 0; + if (state->priority == MASKED) + continue; + + mutex_unlock(&ics->lock); + icp_deliver_irq(xics, icp, ics, i); + mutex_lock(&ics->lock); + } + + mutex_unlock(&ics->lock); +} + +static void icp_check_resend(struct kvmppc_xics *xics, + struct kvmppc_icp *icp) +{ + u32 buid; + + for_each_set_bit(buid, icp->resend_map, xics->max_buid + 1) { + struct kvmppc_ics *ics = xics->ics[buid - 1]; + + if (!test_and_clear_bit(buid, icp->resend_map)) + continue; + if (!ics) + continue; + ics_check_resend(xics, ics, icp); + } +} + +int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_icp *icp; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + bool deliver; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + icp = kvmppc_xics_find_server(kvm, server); + if (!icp) + return -EINVAL; + + mutex_lock(&ics->lock); + + state->server = server; + state->priority = priority; + deliver = false; + if (state->masked_pending && state->priority != MASKED) { + state->masked_pending = 0; + deliver = true; + } + + mutex_unlock(&ics->lock); + + XICS_DBG("irq 0x%06x server %d prio %#x\n", irq, server, priority); + + if (deliver) + icp_deliver_irq(xics, icp, ics, src); + + return 0; +} + +int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + struct ics_irq_state *state; + u16 src; + + if (!xics) + return -ENODEV; + + ics = kvmppc_xics_find_ics(xics, irq, &src); + if (!ics) + return -EINVAL; + state = &ics->irq_state[src]; + + mutex_lock(&ics->lock); + *server = state->server; + *priority = state->priority; + mutex_unlock(&ics->lock); + + XICS_DBG("irq 0x%06x server %d prio %#x\n", + irq, state->server, state->priority); + + return 0; +} + +/* -- ICP routines, including hcalls -- */ + +static void icp_external_interrupt(struct kvmppc_icp *icp) +{ + unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL; + + lockdep_assert_held(&icp->lock); + + kvmppc_book3s_queue_irqprio(icp->vcpu, vec); + kvm_vcpu_kick(icp->vcpu); +} + +static void icp_deliver_irq(struct kvmppc_xics *xics, + struct kvmppc_icp *icp, + struct kvmppc_ics *ics, u16 src) +{ + struct ics_irq_state state_copy; + + mutex_lock(&icp->lock); + + /* Snapshot irq state */ + mutex_lock(&ics->lock); + state_copy = ics->irq_state[src]; + + if (state_copy.priority > icp->current_priority) { + /* CPU is not interested in us */ + __ics_reject_irq(icp, ics, src); + mutex_unlock(&ics->lock); + goto out; + } + + if (icp->pending_irq) { + /* An interrupt is pending */ + if (icp->pending_priority <= state_copy.priority) { + /* pending irq is equally or more favoured */ + __ics_reject_irq(icp, ics, src); + mutex_unlock(&ics->lock); + goto out; + } + } + mutex_unlock(&ics->lock); + + /* We are more favoured, reject pending irq */ + if (icp->pending_irq) + ics_reject_irq(xics, icp, icp->pending_irq); + + icp->pending_irq = state_copy.number; + icp->pending_priority = state_copy.priority; + + XICS_DBG("irq 0x%06x pending on %d prio %#x\n", + state_copy.number, state_copy.server, state_copy.priority); + + icp_external_interrupt(icp); + +out: + mutex_unlock(&icp->lock); +} + +static void icp_check_ipi(struct kvmppc_xics *xics, struct kvmppc_icp *icp) +{ + lockdep_assert_held(&icp->lock); + + if (icp->mfrr >= icp->current_priority) + return; + + XICS_DBG("cpu %d can take IPI mfrr=%#x\n", + icp->vcpu->vcpu_id, icp->mfrr); + + if (icp->pending_irq) { + /* IPI is less favoured */ + if (icp->pending_priority <= icp->mfrr) { + XICS_DBG("ODD: pending_prio=%#x pending_irq=%#x\n", + icp->pending_priority, icp->pending_irq); + return; + } + + /* IPI is more favoured, reject the other interrupt */ + ics_reject_irq(xics, icp, icp->pending_irq); + } + + icp->pending_irq = XICS_IPI; + icp->pending_priority = icp->mfrr; + icp_external_interrupt(icp); +} + +static u32 icp_accept(struct kvm_vcpu *vcpu, struct kvmppc_icp *icp) +{ + u32 xirr; + + mutex_lock(&icp->lock); + + kvmppc_core_dequeue_external(vcpu); + + /* The XIRR is the pending interrupt & current priority */ + xirr = icp->pending_irq | (icp->current_priority << 24); + + /* The pending priority becomes current */ + icp->current_priority = icp->pending_priority; + + /* Clear the pending interrupt */ + icp->pending_irq = 0; + + mutex_unlock(&icp->lock); + + return xirr; +} + +static unsigned long h_xirr(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp = vcpu->arch.icp; + u32 xirr; + + xirr = icp_accept(vcpu, icp); + + XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); + + return xirr; +} + +static int h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp; + + XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", + vcpu->vcpu_id, server, mfrr); + + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + + mutex_lock(&icp->lock); + + icp->mfrr = mfrr; + icp_check_ipi(xics, icp); + + mutex_unlock(&icp->lock); + + return H_SUCCESS; +} + +static void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + u8 old_priority; + bool check_resend = false; + + XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); + + mutex_lock(&icp->lock); + + old_priority = icp->current_priority; + icp->current_priority = cppr; + + if (icp->pending_irq && + icp->current_priority < icp->pending_priority) { + u32 pending = icp->pending_irq; + /* Pending irq is less favoured than our new priority */ + icp->pending_irq = 0; + kvmppc_core_dequeue_external(vcpu); + ics_reject_irq(xics, icp, pending); + } + + /* Check if there is anything we can accept now */ + if (!icp->pending_irq) + icp_check_ipi(xics, icp); + if (!icp->pending_irq && icp->need_resend) { + check_resend = true; + icp->need_resend = false; + } + + mutex_unlock(&icp->lock); + + if (check_resend) + icp_check_resend(xics, icp); +} + +static void h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) +{ + struct kvmppc_xics *xics = vcpu->kvm->arch.xics; + struct kvmppc_icp *icp = vcpu->arch.icp; + bool check_resend = false; + + XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); + + mutex_lock(&icp->lock); + + icp->current_priority = xirr >> 24; + + /* If nothing is pending since accept, check for an IPI */ + if (!icp->pending_irq) + icp_check_ipi(xics, icp); + + if (!icp->pending_irq && icp->need_resend) { + check_resend = true; + icp->need_resend = false; + } + + ics_eoi(xics, icp, xirr & 0xFFFFFF); + + mutex_unlock(&icp->lock); + + if (check_resend) + icp_check_resend(xics, icp); +} + +int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) +{ + unsigned long res; + int rc = H_SUCCESS; + + /* Check if we have an ICP */ + if (!vcpu->arch.icp || !vcpu->kvm->arch.xics) + return H_HARDWARE; + + switch (req) { + case H_XIRR: + res = h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + break; + case H_CPPR: + h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_EOI: + h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_IPI: + rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + break; + } + + return rc; +} + + +/* -- Initialisation code etc. -- */ + +static int xics_debug_show(struct seq_file *m, void *private) +{ + struct kvmppc_xics *xics = m->private; + struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + int buid, i; + + if (!kvm) + return 0; + + seq_printf(m, "=========\nICP state\n=========\n"); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvmppc_icp *icp = vcpu->arch.icp; + + if (!icp) + continue; + + mutex_lock(&icp->lock); + + seq_printf(m, "cpu server %#x pending %#x pending prio %#x cppr %#x " + "mfrr %#x\n", vcpu->vcpu_id, icp->pending_irq, + icp->pending_priority, icp->current_priority, + icp->mfrr); + + mutex_unlock(&icp->lock); + } + + for (buid = 1; buid <= KVMPPC_XICS_MAX_BUID; buid++) { + struct kvmppc_ics *ics = xics->ics[buid - 1]; + + if (!ics) + continue; + + seq_printf(m, "=========\nICS state for BUID 0x%x\n=========\n", buid); + + mutex_lock(&ics->lock); + + for (i = 0; i < ics->nr_irqs; i++) { + struct ics_irq_state *irq = &ics->irq_state[i]; + + seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x " + "asserted %d resend %d masked pending %d\n", + irq->number, irq->server, irq->priority, + irq->saved_priority, irq->asserted, irq->resend, + irq->masked_pending); + + } + mutex_unlock(&ics->lock); + } + return 0; +} + +static int xics_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, xics_debug_show, inode->i_private); +} + +static const struct file_operations xics_debug_fops = { + .open = xics_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void xics_debugfs_init(struct kvmppc_xics *xics) +{ + char *name; + + name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); + if (!name) { + pr_err("%s: no memory for name\n", __func__); + return; + } + + xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, + xics, &xics_debug_fops); + + pr_debug("%s: created %s\n", __func__, name); + kfree(name); +} + +static int kvmppc_xics_create_ics(struct kvmppc_xics *xics, u16 buid, u16 nr_irq) +{ + struct kvmppc_ics *ics; + int i, size; + + + /* Create the ICS */ + size = sizeof(struct kvmppc_ics) + sizeof(struct ics_irq_state) * nr_irqs; + ics = kzalloc(size, GFP_KERNEL); + if (!ics) + return -ENOMEM; + + mutex_init(&ics->lock); + ics->buid = buid; + ics->nr_irqs = nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + ics->irq_state[i].number = (buid << KVMPPC_XICS_BUID_SHIFT) | i; + ics->irq_state[i].priority = MASKED; + ics->irq_state[i].saved_priority = MASKED; + } + smp_wmb(); + xics->ics[buid - 1] = ics; + + if (buid > xics->max_buid) + xics->max_buid = buid; + + return 0; +} + +int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu) +{ + struct kvmppc_icp *icp; + + icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); + if (!icp) + return -ENOMEM; + + mutex_init(&icp->lock); + icp->vcpu = vcpu; + icp->mfrr = MASKED; + vcpu->arch.icp = icp; + + XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); + + return 0; +} + +void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.icp) + return; + kfree(vcpu->arch.icp); + vcpu->arch.icp = NULL; +} + +void kvmppc_xics_free(struct kvm *kvm) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + int i; + + if (!xics) + return; + + lockdep_assert_held(&kvm->lock); + + debugfs_remove(xics->dentry); + + if (xics->kvm) { + xics->kvm->arch.xics = NULL; + xics->kvm = NULL; + } + + for (i = 0; i < xics->max_buid; i++) { + if (xics->ics[i]) + kfree(xics->ics[i]); + } + kfree(xics); +} + +/* -- ioctls -- */ + +static int kvm_vm_ioctl_create_icp(struct kvm *kvm, + struct kvm_irqchip_args *args) +{ + struct kvmppc_xics *xics; + int rc = 0; + + mutex_lock(&kvm->lock); + + /* Already there ? */ + if (kvm->arch.xics) + return -EEXIST; + + xics = kzalloc(sizeof(*xics), GFP_KERNEL); + if (!xics) { + rc = -ENOMEM; + goto out; + } + + xics->kvm = kvm; + kvm->arch.xics = xics; + xics_debugfs_init(xics); + +out: + mutex_unlock(&kvm->lock); + return rc; +} + +static int kvm_vm_ioctl_create_ics(struct kvm *kvm, + struct kvm_irqchip_args *args) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + u16 nr_irqs, buid; + int rc; + + if (!xics) + return -ENODEV; + + nr_irqs = args->ics.nr_irqs; + buid = args->ics.buid; + + /* BUID 0 is bogus */ + if (buid == 0) { + rc = 0; + goto out; + } + + /* Sanity checks */ + if (nr_irqs == 0 || nr_irqs > KVMPPC_XICS_IRQ_COUNT || + buid > KVMPPC_XICS_MAX_BUID) + return -EINVAL; + + mutex_lock(&kvm->lock); + + /* BUID already exists */ + if (xics->ics[buid - 1]) { + rc = -EEXIST; + goto out; + } + + /* Create the ICS */ + rc = kvmppc_xics_create_ics(xics, buid, nr_irqs); +out: + mutex_unlock(&kvm->lock); + return rc; +} + +static int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args) +{ + struct kvmppc_xics *xics; + + /* locking against multiple callers? */ + + xics = kvm->arch.xics; + if (!xics) + return -ENODEV; + + switch (args->level) { + case KVM_INTERRUPT_SET: + case KVM_INTERRUPT_SET_LEVEL: + case KVM_INTERRUPT_UNSET: + ics_deliver_irq(xics, args->irq, args->level); + break; + default: + return -EINVAL; + } + + return 0; +} + +int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int rc; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + struct kvm_irqchip_args args; + + rc = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + rc = -EINVAL; + if (args.type == KVM_IRQCHIP_TYPE_ICP) + rc = kvm_vm_ioctl_create_icp(kvm, &args); + else if (args.type == KVM_IRQCHIP_TYPE_ICS) + rc = kvm_vm_ioctl_create_ics(kvm, &args); + break; + } + + case KVM_IRQ_LINE: { + struct kvm_irq_level args; + + rc = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + rc = kvm_vm_ioctl_xics_irq(kvm, &args); + break; + } + + default: + rc = -ENOTTY; + break; + } + + return rc; +} diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5ecfd80..507c9f5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -199,8 +199,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, prio); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu) { clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4b7522f..89e3572 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -286,6 +286,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_PPC_GET_SMMU_INFO: + case KVM_CAP_SPAPR_XICS: r = 1; break; #endif @@ -611,7 +612,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { - kvmppc_core_dequeue_external(vcpu, irq); + kvmppc_core_dequeue_external(vcpu); return 0; } @@ -841,11 +842,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } - - case KVM_PPC_RTAS_DEFINE_TOKEN: - r = kvm_vm_ioctl_rtas_define_token(kvm, argp); - break; - #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 @@ -859,7 +855,27 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_PPC_RTAS_DEFINE_TOKEN: + r = kvm_vm_ioctl_rtas_define_token(kvm, argp); + break; #endif /* CONFIG_PPC_BOOK3S_64 */ + case KVM_IRQ_LINE: + if (kvmppc_xics_enabled(kvm)) + r = kvmppc_xics_ioctl(kvm, ioctl, arg); + else + r = -ENOTTY; + break; + case KVM_CREATE_IRQCHIP: { + u32 type; + + r = -EFAULT; + if (get_user(type, (u32 __user *)argp)) + break; + r = -EINVAL; + if (type == KVM_IRQCHIP_TYPE_ICP || type == KVM_IRQCHIP_TYPE_ICS) + r = kvmppc_xics_ioctl(kvm, ioctl, arg); + break; + } default: r = -ENOTTY; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 35c063a..f9a396f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -111,6 +111,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * On powerpc SPAPR, the ICS source number, level is ignored. */ union { __u32 irq; @@ -620,6 +621,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_ALLOC_HTAB 80 #define KVM_CAP_PPC_VPA 81 #define KVM_CAP_PPC_RTAS 82 +#define KVM_CAP_SPAPR_XICS 83 #ifdef KVM_CAP_IRQ_ROUTING @@ -753,6 +755,11 @@ struct kvm_msi { __u8 pad[16]; }; +#ifndef __KVM_HAVE_IRQCHIP_ARGS +/* Allow arch code to optionally define args for KVM_CREATE_IRQCHIP */ +struct kvm_irqchip_args { }; +#endif + /* * ioctls for VM fds */ @@ -783,7 +790,7 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) /* Device model IOC */ -#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_CREATE_IRQCHIP _IOW(KVMIO, 0x60, struct kvm_irqchip_args) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html