States we need to capture are : - the IVE table defining the source targeting - the main interrupt management registers for each vCPU - the EQs. Also mark the EQ page dirty to make sure it is transferred. This is work in progress. We need to make sure the HW has reached a quiescence point. Signed-off-by: Cédric Le Goater <clg@xxxxxxxx> --- arch/powerpc/include/asm/kvm_ppc.h | 10 ++ arch/powerpc/include/uapi/asm/kvm.h | 11 ++ arch/powerpc/kvm/book3s.c | 46 +++++ arch/powerpc/kvm/book3s_xive_native.c | 320 ++++++++++++++++++++++++++++++++++ 4 files changed, 387 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b5fceb4d7776..748518c7bf70 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -269,6 +269,8 @@ union kvmppc_one_reg { u64 addr; u64 length; } vpaval; + u32 xeqval[8]; + u64 vpval[2]; }; struct kvmppc_ops { @@ -594,6 +596,10 @@ extern int kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu); extern void kvmppc_xive_native_init_module(void); extern void kvmppc_xive_native_exit_module(void); extern int kvmppc_xive_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val); +extern int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val); +extern int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val); +extern int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val); #else static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, @@ -627,6 +633,10 @@ static inline void kvmppc_xive_native_init_module(void) { } static inline void kvmppc_xive_native_exit_module(void) { } static inline int kvmppc_xive_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } +static inline int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return 0; } +static inline int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) { return -ENOENT; } +static inline int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val) { return 0; } +static inline int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, union kvmppc_one_reg *val) { return -ENOENT; } #endif /* CONFIG_KVM_XIVE */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 6e120129dfe6..8a2be937a98e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -480,6 +480,16 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */ #define KVM_REG_PPC_ICP_PPRI_MASK 0xff +#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d) +#define KVM_REG_PPC_VP_EQ0 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8e) +#define KVM_REG_PPC_VP_EQ1 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x8f) +#define KVM_REG_PPC_VP_EQ2 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x90) +#define KVM_REG_PPC_VP_EQ3 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x91) +#define KVM_REG_PPC_VP_EQ4 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x92) +#define KVM_REG_PPC_VP_EQ5 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x93) +#define KVM_REG_PPC_VP_EQ6 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x94) +#define KVM_REG_PPC_VP_EQ7 (KVM_REG_PPC | KVM_REG_SIZE_U256 | 0x95) + /* Device control API: PPC-specific devices */ #define KVM_DEV_MPIC_GRP_MISC 1 #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ @@ -679,6 +689,7 @@ struct kvm_ppc_cpu_char { #define KVM_DEV_XIVE_GET_ESB_FD 1 #define KVM_DEV_XIVE_GET_TIMA_FD 2 #define KVM_DEV_XIVE_VC_BASE 3 +#define KVM_DEV_XIVE_GRP_IVE 3 /* Layout of 64-bit XIVE source attribute values */ #define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 19c0187cada3..fc745233b2d9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -625,6 +625,29 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); break; #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_REG_PPC_VP_STATE: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) + r = kvmppc_xive_get_vp(vcpu, val); + else + r = -ENXIO; + break; + case KVM_REG_PPC_VP_EQ0 ... KVM_REG_PPC_VP_EQ7: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) { + i = id - KVM_REG_PPC_VP_EQ0; + r = kvmppc_xive_get_vp_queue(vcpu, i, val); + } else + r = -ENXIO; + break; +#endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: *val = get_reg_val(id, vcpu->arch.fscr); break; @@ -698,6 +721,29 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); break; #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_XIVE + case KVM_REG_PPC_VP_STATE: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) + r = kvmppc_xive_set_vp(vcpu, val); + else + r = -ENXIO; + break; + case KVM_REG_PPC_VP_EQ0 ... KVM_REG_PPC_VP_EQ7: + if (!vcpu->arch.xive_vcpu) { + r = -ENXIO; + break; + } + if (xive_enabled()) { + i = id - KVM_REG_PPC_VP_EQ0; + kvmppc_xive_set_vp_queue(vcpu, i, val); + } else + r = -ENXIO; + break; +#endif /* CONFIG_KVM_XIVE */ case KVM_REG_PPC_FSCR: vcpu->arch.fscr = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d705de3c5d65..056d4669a506 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -189,6 +189,233 @@ static int xive_native_validate_queue_size(u32 qsize) } } +int kvmppc_xive_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + u32 version; + int rc; + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc) + return -ENOENT; + + val->vpval[0] = vcpu->arch.xive_saved_state.w01; + + rc = xive_native_get_vp_state(xc->vp_id, &version, &val->vpval[1]); + if (rc) + return rc; + + if (XIVE_STATE_COMPAT(version) > 1) { + pr_err("invalid OPAL state version %08x\n", version); + return -EIO; + } + + pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", + __func__, + vcpu->arch.xive_saved_state.nsr, + vcpu->arch.xive_saved_state.cppr, + vcpu->arch.xive_saved_state.ipb, + vcpu->arch.xive_saved_state.pipr, + vcpu->arch.xive_saved_state.w01, + (u32) vcpu->arch.xive_cam_word, val->vpval[1]); + + return 0; +} + +int kvmppc_xive_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; + u32 version = XIVE_STATE_VERSION; + int rc; + + pr_devel("%s w01=%016llx vp=%016llx\n", __func__, val->vpval[0], + val->vpval[1]); + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc || !xive) + return -ENOENT; + + /* We can't update the state of a "pushed" VCPU */ + if (WARN_ON(vcpu->arch.xive_pushed)) + return -EIO; + + /* TODO: only restore IPB and CPPR ? */ + vcpu->arch.xive_saved_state.w01 = val->vpval[0]; + + rc = xive_native_set_vp_state(xc->vp_id, version, val->vpval[1]); + if (rc) + return rc; + + return 0; +} + +int kvmppc_xive_get_vp_queue(struct kvm_vcpu *vcpu, int priority, + union kvmppc_one_reg *val) +{ + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct xive_q *q; + u64 qpage; + u64 qsize; + u64 qeoi_page; + u32 escalate_irq; + u64 qflags; + u32 version; + u64 qw1; + int rc; + + pr_debug("%s vcpu %d priority %d\n", __func__, xc->server_num, + priority); + + if (!kvmppc_xive_enabled(vcpu)) + return -EPERM; + + if (!xc) + return -ENOENT; + + if (priority != xive_prio_from_guest(priority) || priority == MASKED) { + pr_err("Trying to retrieve info from queue %d for VCPU %d\n", + priority, xc->server_num); + return -EINVAL; + } + q = &xc->queues[priority]; + + memset(val->xeqval, 0, sizeof(val->xeqval)); + + if (!q->qpage) + return 0; + + rc = xive_native_get_queue_info(xc->vp_id, priority, &qpage, &qsize, + &qeoi_page, &escalate_irq, &qflags); + if (rc) + return rc; + + rc = xive_native_get_queue_state(xc->vp_id, priority, &version, &qw1); + if (rc) + return rc; + + if (XIVE_STATE_COMPAT(version) > 1) { + pr_err("invalid OPAL state version %08x\n", version); + return -EIO; + } + + val->xeqval[0] = 0; + if (qflags & OPAL_XIVE_EQ_ENABLED) + val->xeqval[0] |= EQ_W0_VALID|EQ_W0_ENQUEUE; + if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) + val->xeqval[0] |= EQ_W0_UCOND_NOTIFY; + if (qflags & OPAL_XIVE_EQ_ESCALATE) + val->xeqval[0] |= EQ_W0_ESCALATE_CTL; + val->xeqval[0] |= SETFIELD(EQ_W0_QSIZE, 0ul, qsize - 12); + + val->xeqval[1] = qw1 & 0xffffffff; + val->xeqval[2] = (q->guest_qpage >> 32) & 0x0fffffff; + val->xeqval[3] = q->guest_qpage & 0xffffffff; + val->xeqval[4] = 0; + val->xeqval[5] = 0; + val->xeqval[6] = SETFIELD(EQ_W6_NVT_BLOCK, 0ul, 0ul) | + SETFIELD(EQ_W6_NVT_INDEX, 0ul, xc->server_num); + val->xeqval[7] = SETFIELD(EQ_W7_F0_PRIORITY, 0ul, priority); + + /* Mark EQ page dirty for migration */ + mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qpage)); + + return 0; +} + +int kvmppc_xive_set_vp_queue(struct kvm_vcpu *vcpu, int priority, + union kvmppc_one_reg *val) +{ + struct kvm *kvm = vcpu->kvm; + struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; + u32 qsize; + u64 qpage; + u32 server; + u8 prio; + int rc; + __be32 *qaddr = 0; + struct page *page; + struct xive_q *q; + u32 version = XIVE_STATE_VERSION; + + pr_devel("%s vcpu %d priority %d\n", __func__, xc->server_num, + priority); + + if (!xc || !xive) + return -ENOENT; + + /* + * Check that we are not trying to configure queues reserved + * for the hypervisor + */ + if (priority != xive_prio_from_guest(priority) || priority == MASKED) { + pr_err("Trying to restore invalid queue %d for VCPU %d\n", + priority, xc->server_num); + return -EINVAL; + } + + qsize = GETFIELD(EQ_W0_QSIZE, val->xeqval[0]) + 12; + qpage = (((u64)(val->xeqval[2] & 0x0fffffff)) << 32) | val->xeqval[3]; + server = GETFIELD(EQ_W6_NVT_INDEX, val->xeqval[6]); + prio = GETFIELD(EQ_W7_F0_PRIORITY, val->xeqval[7]); + + if (xc->server_num != server) { + vcpu = kvmppc_xive_find_server(kvm, server); + if (!vcpu) { + pr_debug("Can't find server %d\n", server); + return -EINVAL; + } + xc = vcpu->arch.xive_vcpu; + } + + if (priority != prio) { + pr_err("invalid state for queue %d for VCPU %d\n", + priority, xc->server_num); + return -EIO; + } + q = &xc->queues[prio]; + + rc = xive_native_validate_queue_size(qsize); + if (rc || !qsize) { + pr_err("invalid queue size %d\n", qsize); + return rc; + } + + page = gfn_to_page(kvm, gpa_to_gfn(qpage)); + if (is_error_page(page)) { + pr_debug("Couldn't get guest page for %llx!\n", qpage); + return -ENOMEM; + } + qaddr = page_to_virt(page) + (qpage & ~PAGE_MASK); + q->guest_qpage = qpage; + + rc = xive_native_configure_queue(xc->vp_id, q, prio, (__be32 *) qaddr, + qsize, true); + if (rc) { + pr_err("Failed to configure queue %d for VCPU %d: %d\n", + prio, xc->server_num, rc); + put_page(page); + return rc; + } + + rc = xive_native_set_queue_state(xc->vp_id, prio, version, + val->xeqval[1]); + if (rc) + goto error; + + rc = kvmppc_xive_attach_escalation(vcpu, prio); +error: + if (rc) + xive_native_cleanup_queue(vcpu, prio); + return rc; +} + + static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, u64 addr) { @@ -328,6 +555,94 @@ static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, return rc; } +static int kvmppc_xive_native_set_ive(struct kvmppc_xive *xive, long irq, + u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + u64 __user *ubufp = (u64 __user *) addr; + u16 src; + u64 ive; + u32 eq_idx; + u32 server; + u8 priority; + u32 eisn; + + pr_devel("%s irq=0x%lx\n", __func__, irq); + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + return -ENOENT; + + state = &sb->irq_state[src]; + + if (!state->valid) + return -ENOENT; + + if (get_user(ive, ubufp)) { + pr_err("fault getting user info !\n"); + return -EFAULT; + } + + if (!(ive & IVE_VALID) || ive & IVE_MASKED) { + pr_err("invalid IVE %016llx for IRQ %lx\n", ive, irq); + return -EPERM; + } + + /* QEMU encoding of EQ index */ + eq_idx = GETFIELD(IVE_EQ_INDEX, ive); + server = eq_idx >> 3; + priority = eq_idx & 0x7; + + eisn = GETFIELD(IVE_EQ_DATA, ive); + + return kvmppc_xive_native_set_source_config(xive, sb, state, server, + priority, eisn); +} + +static int kvmppc_xive_native_get_ive(struct kvmppc_xive *xive, long irq, + u64 addr) +{ + struct kvmppc_xive_src_block *sb; + struct kvmppc_xive_irq_state *state; + u64 __user *ubufp = (u64 __user *) addr; + u16 src; + u64 ive; + u32 eq_idx; + + pr_devel("%s irq=0x%lx\n", __func__, irq); + + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + return -ENOENT; + + state = &sb->irq_state[src]; + + if (!state->valid) + return -ENOENT; + + ive = IVE_VALID; + + arch_spin_lock(&sb->lock); + + if (state->act_priority == MASKED) + ive |= IVE_MASKED; + else { + /* QEMU encoding of EQ index */ + eq_idx = ((state->act_server) << 3) | + ((state->act_priority) & 0x7); + ive |= SETFIELD(IVE_EQ_BLOCK, 0ul, 0ul) | + SETFIELD(IVE_EQ_INDEX, 0ul, eq_idx) | + SETFIELD(IVE_EQ_DATA, 0ul, state->eisn); + } + arch_spin_unlock(&sb->lock); + + if (put_user(ive, ubufp)) + return -EFAULT; + + return 0; +} + static int xive_native_esb_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -455,6 +770,8 @@ static int kvmppc_xive_native_set_attr(struct kvm_device *dev, case KVM_DEV_XIVE_GRP_SOURCES: return kvmppc_xive_native_set_source(xive, attr->attr, attr->addr); + case KVM_DEV_XIVE_GRP_IVE: + return kvmppc_xive_native_set_ive(xive, attr->attr, attr->addr); case KVM_DEV_XIVE_GRP_CTRL: switch (attr->attr) { case KVM_DEV_XIVE_VC_BASE: @@ -471,6 +788,8 @@ static int kvmppc_xive_native_get_attr(struct kvm_device *dev, struct kvmppc_xive *xive = dev->private; switch (attr->group) { + case KVM_DEV_XIVE_GRP_IVE: + return kvmppc_xive_native_get_ive(xive, attr->attr, attr->addr); case KVM_DEV_XIVE_GRP_CTRL: switch (attr->attr) { case KVM_DEV_XIVE_GET_ESB_FD: @@ -490,6 +809,7 @@ static int kvmppc_xive_native_has_attr(struct kvm_device *dev, { switch (attr->group) { case KVM_DEV_XIVE_GRP_SOURCES: + case KVM_DEV_XIVE_GRP_IVE: if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && attr->attr < KVMPPC_XIVE_NR_IRQS) return 0; -- 2.13.6