On Wed, May 13, 2015 at 08:12:59AM +0200, Jan Kiszka wrote: > On 2015-05-13 03:47, Steve Rutherford wrote: > > In order to enable userspace PIC support, the userspace PIC needs to > > be able to inject local interrupt requests. > > > > This adds the ioctl KVM_REQUEST_LOCAL_INTERRUPT and kvm exit > > KVM_EXIT_GET_EXTINT. > > > > The vm ioctl KVM_REQUEST_LOCAL_INTERRUPT makes a KVM_REQ_EVENT request > > on the BSP, which causes the BSP to exit to userspace to fetch the > > vector of the underlying external interrupt, which the BSP then > > injects into the guest. This matches the PIC spec, and is necessary to > > boot Windows. > > The API name seems too generic, given the fairly specific use case. As > it only allows to kick the BSP, not any VCPU, that should be clarified. > Maybe call it KVM_REQUEST_PIC_INJECTION or so. Or allow userspace to > specify the target VCPU, then it's a bit more generic again. > > Actually, when looking at the MultiProcessor spec, you will find > multiple models for injecting PIC interrupts into CPU APICs. Just in the > PIC Mode, the BSP is the only possible target. In the other modes, all > APICs can receive PIC interrupts, and either the IOAPIC or the local > APIC's LINT0 configuration decide about the effective target. We should > allow to model all modes, based on userspace decisions. > Supporting the other modes seems reasonable, but I'm not certain I have an outlet for testing them for correctness. I'm not even certain which OSes use the other modes. Unless there is a common OS that uses the other modes, and a straightforward way for me to test the other modes, I'd rather just rename the API to be less generic. > > > > Boots and passes the KVM unit tests on intel x86 with the > > PIC/PIT/IOAPIC in userspace (under a non-QEMU VMM). Boots and passes > > the KVM unit tests under normal conditions as well. > > Do you plan to provide a reference implementation for an open source > userspace VMM as well, once the kernel side is settled? > Not at the moment (given that I'm not all that familiar with QEMU). I'm definitely willing to help guide someone else through the process. Steve > > > > SVM support and device assignment are untested with this feature > > enabled, but testing for both is in the works. > > > > Compiles for ARM/x86/PPC. > > > > Signed-off-by: Steve Rutherford <srutherford@xxxxxxxxxx> > > --- > > Documentation/virtual/kvm/api.txt | 9 +++++++ > > arch/x86/include/asm/kvm_host.h | 1 + > > arch/x86/kvm/irq.c | 6 ++++- > > arch/x86/kvm/lapic.c | 7 ++++++ > > arch/x86/kvm/lapic.h | 2 ++ > > arch/x86/kvm/x86.c | 53 ++++++++++++++++++++++++++++++++++++--- > > include/uapi/linux/kvm.h | 6 +++++ > > 7 files changed, 80 insertions(+), 4 deletions(-) > > > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > > index dd92996..a650321 100644 > > --- a/Documentation/virtual/kvm/api.txt > > +++ b/Documentation/virtual/kvm/api.txt > > @@ -2993,6 +2993,15 @@ the ioapic nor the pic in the kernel. Also, enables in kernel routing of > > interrupt requests. Fails if VCPU has already been created, or if the irqchip is > > already in the kernel. > > > > +4.97 KVM_REQUEST_LOCAL_INTERRUPT > > + > > +Capability: KVM_CAP_SPLIT_IRQCHIP > > +Type: VM ioctl > > +Parameters: none > > +Returns: 0 on success, -1 on error. > > + > > +Informs the kernel that userspace has a pending external interrupt. > > + > > > > 5. The kvm_run structure > > ------------------------ > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index b1978f1..602ea70 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -542,6 +542,7 @@ struct kvm_vcpu_arch { > > > > u64 eoi_exit_bitmaps[4]; > > int pending_ioapic_eoi; > > + bool pending_external; > > }; > > > > struct kvm_lpage_info { > > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c > > index 706e47a..487b5f5 100644 > > --- a/arch/x86/kvm/irq.c > > +++ b/arch/x86/kvm/irq.c > > @@ -43,7 +43,11 @@ EXPORT_SYMBOL(kvm_cpu_has_pending_timer); > > */ > > static int kvm_cpu_has_extint(struct kvm_vcpu *v) > > { > > - if (kvm_apic_accept_pic_intr(v)) > > + u8 accept = kvm_apic_accept_pic_intr(v); > > + > > + if (accept && irqchip_split(v->kvm)) > > + return v->arch.pending_external; > > + else if (accept) > > return pic_irqchip(v->kvm)->output; /* PIC */ > > else > > return 0; > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index 7533b87..9a021f7 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -2089,3 +2089,10 @@ void kvm_lapic_init(void) > > jump_label_rate_limit(&apic_hw_disabled, HZ); > > jump_label_rate_limit(&apic_sw_disabled, HZ); > > } > > + > > +void kvm_request_local_interrupt(struct kvm_vcpu *vcpu) > > +{ > > + vcpu->arch.pending_external = true; > > + kvm_make_request(KVM_REQ_EVENT, vcpu); > > + kvm_vcpu_kick(vcpu); > > +} > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > > index 71b150c..66bb780 100644 > > --- a/arch/x86/kvm/lapic.h > > +++ b/arch/x86/kvm/lapic.h > > @@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, > > unsigned long *dest_map); > > int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); > > > > +void kvm_request_local_interrupt(struct kvm_vcpu *vcpu); > > + > > bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, > > struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 6127fe7..b7ceff3 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -65,6 +65,8 @@ > > #include <asm/pvclock.h> > > #include <asm/div64.h> > > > > +#define GET_VECTOR_FROM_USERSPACE 1 > > + > > #define MAX_IO_MSRS 256 > > #define KVM_MAX_MCE_BANKS 32 > > #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P) > > @@ -4146,6 +4148,30 @@ split_irqchip_unlock: > > mutex_unlock(&kvm->lock); > > break; > > } > > + case KVM_REQUEST_LOCAL_INTERRUPT: { > > + int i; > > + struct kvm_vcpu *vcpu; > > + struct kvm_vcpu *bsp_vcpu = NULL; > > + > > + mutex_lock(&kvm->lock); > > + r = -EEXIST; > > Nitpicking, EEXIST seems confusing. I would go for EINVAL here as well. > > > + if (!irqchip_split(kvm)) > > + goto out; > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + if (kvm_vcpu_is_reset_bsp(vcpu)) { > > + bsp_vcpu = vcpu; > > + continue; > > + } > > + } > > + r = -EINVAL; > > + if (bsp_vcpu == NULL) > > + goto interrupt_unlock; > > + kvm_request_local_interrupt(bsp_vcpu); > > + r = 0; > > +interrupt_unlock: > > + mutex_unlock(&kvm->lock); > > + break; > > + } > > > > default: > > r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); > > @@ -6123,6 +6149,16 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) > > kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); > > } > > > > +static int pending_userspace_external_intr(struct kvm_vcpu *vcpu) > > +{ > > + if (vcpu->arch.pending_external && kvm_apic_accept_pic_intr(vcpu)) { > > + vcpu->arch.pending_external = false; > > + return true; > > + } else > > + return false; > > + > > +} > > + > > static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) > > { > > int r; > > @@ -6187,7 +6223,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) > > return r; > > } > > if (kvm_x86_ops->interrupt_allowed(vcpu)) { > > - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), > > + if (irqchip_split(vcpu->kvm) && > > + pending_userspace_external_intr(vcpu)) { > > + return GET_VECTOR_FROM_USERSPACE; > > + } > > + kvm_queue_interrupt(vcpu, > > + kvm_cpu_get_interrupt(vcpu), > > false); > > kvm_x86_ops->set_irq(vcpu); > > } > > @@ -6351,13 +6392,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) > > } > > > > if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { > > + int event; > > kvm_apic_accept_events(vcpu); > > if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { > > r = 1; > > goto out; > > } > > - > > - if (inject_pending_event(vcpu, req_int_win) != 0) > > + event = inject_pending_event(vcpu, req_int_win); > > + if (event == GET_VECTOR_FROM_USERSPACE) { > > + vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT; > > + kvm_make_request(KVM_REQ_EVENT, vcpu); > > + r = 0; > > + goto out; > > + } else if (event != 0) > > req_immediate_exit = true; > > /* enable NMI/IRQ window open exits if needed */ > > else if (vcpu->arch.nmi_pending) > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 2305948..368e80a 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -184,6 +184,7 @@ struct kvm_s390_skeys { > > #define KVM_EXIT_SYSTEM_EVENT 24 > > #define KVM_EXIT_S390_STSI 25 > > #define KVM_EXIT_IOAPIC_EOI 26 > > +#define KVM_EXIT_GET_EXTINT 27 > > > > /* For KVM_EXIT_INTERNAL_ERROR */ > > /* Emulate instruction failed. */ > > @@ -334,6 +335,10 @@ struct kvm_run { > > struct { > > __u8 vector; > > } eoi; > > + /* KVM_EXIT_GET_EXTINT */ > > + struct { > > + __u8 vector; > > + } extint; > > /* Fix the size of the union. */ > > char padding[256]; > > }; > > @@ -1208,6 +1213,7 @@ struct kvm_s390_ucas_mapping { > > #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) > > /* Available with KVM_CAP_SPLIT_IRQCHIP */ > > #define KVM_CREATE_SPLIT_IRQCHIP _IO(KVMIO, 0xb7) > > +#define KVM_REQUEST_LOCAL_INTERRUPT _IO(KVMIO, 0xb8) > > > > #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) > > #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) > > > > Jan > > -- > Siemens AG, Corporate Technology, CT RTC ITP SES-DE > Corporate Competence Center Embedded Linux -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html