Re: [PATCH v3 4/4] KVM: x86: Add support for local interrupt requests from userspace

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jun 03, 2015 at 11:38:21AM +0200, Paolo Bonzini wrote:
> 
> 
> On 03/06/2015 01:51, Steve Rutherford wrote:
> > In order to enable userspace PIC support, the userspace PIC needs to
> > be able to inject local interrupt requests.
> > 
> > This adds the ioctl KVM_REQUEST_PIC_INJECTION and kvm exit
> > KVM_EXIT_GET_EXTINT.
> > 
> > The vm ioctl KVM_REQUEST_PIC_INJECTION makes a KVM_REQ_EVENT request
> > on the BSP, which causes the BSP to exit to userspace to fetch the
> > vector of the underlying external interrupt, which the BSP then
> > injects into the guest. This matches the PIC spec, and is necessary to
> > boot Windows.
> > 
> > Compiles for x86.
> > 
> > Update: Boots Windows and passes the KVM Unit Tests.
> > 
> > Signed-off-by: Steve Rutherford <srutherford@xxxxxxxxxx>
> > ---
> >  Documentation/virtual/kvm/api.txt |  9 ++++++
> >  arch/x86/include/asm/kvm_host.h   |  2 ++
> >  arch/x86/kvm/irq.c                | 22 +++++++++++++--
> >  arch/x86/kvm/lapic.c              |  7 +++++
> >  arch/x86/kvm/lapic.h              |  2 ++
> >  arch/x86/kvm/x86.c                | 59 +++++++++++++++++++++++++++++++++++++--
> >  include/uapi/linux/kvm.h          |  7 +++++
> >  7 files changed, 103 insertions(+), 5 deletions(-)
> > 
> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > index 6ab2a3f7..b5d90cb 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -2979,6 +2979,15 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
> >  and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
> >  which is the maximum number of possibly pending cpu-local interrupts.
> >  
> > +4.96 KVM_REQUEST_PIC_INJECTION
> > +
> > +Capability: KVM_CAP_SPLIT_IRQCHIP
> > +Type: VM ioctl
> > +Parameters: none
> > +Returns: 0 on success, -1 on error.
> > +
> > +Informs the kernel that userspace has a pending external interrupt.
> > +
> 
> Missing documentation for the new vmexit and kvm_run member.
> 
> However, why is the roundtrip to userspace necessary?  Could you pass
> the extint index directly as an argument to KVM_INTERRUPT?  It's
> backwards-compatible, because KVM_INTERRUPT so far could not be used
> together with an in-kernel LAPIC.  If you could do that, you could also
> avoid the new userspace_extint_available field.
> 
> Userspace can figure out who's the BSP.  The rendez-vous between the
> irqchip and the BSP's VCPU thread is still needed, but it can be done
> entirely in userspace.
> 
> You'd also need much fewer changes to irq.c.  Basically just something like
> 
>  int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
>  {
>          int vector;
> 
> -        if (!irqchip_in_kernel(v->kvm))
> +        if (!pic_in_kernel(v->kvm) && v->arch.interrupt.pending)
>                 return v->arch.interrupt.nr;
> 
> ...
> 
>  int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
>  {
> -        if (!irqchip_in_kernel(v->kvm))
> +        if (!pic_in_kernel(v->kvm))
>                  return v->arch.interrupt.pending;
> 
> ...
> 
>  int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  {
> -        if (!irqchip_in_kernel(v->kvm))
> +        if (!pic_in_kernel(v->kvm))
>                  return v->arch.interrupt.pending;
> 
> More comments below.
> 
> >  5. The kvm_run structure
> >  ------------------------
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 4f439ff..0e8b0fc 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -543,6 +543,8 @@ struct kvm_vcpu_arch {
> >  
> >  	u64 eoi_exit_bitmaps[4];
> >  	int pending_ioapic_eoi;
> > +	bool userspace_extint_available;
> > +	int pending_external_vector;
> >  };
> >  
> 
> >  struct kvm_lpage_info {
> > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> > index 706e47a..1270b2a 100644
> > --- a/arch/x86/kvm/irq.c
> > +++ b/arch/x86/kvm/irq.c
> > @@ -38,12 +38,25 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
> >  EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
> >  
> >  /*
> > + * check if there is a pending userspace external interrupt
> > + */
> > +static int pending_userspace_extint(struct kvm_vcpu *v)
> > +{
> > +	return v->arch.userspace_extint_available ||
> > +	       v->arch.pending_external_vector != -1;
> > +}
> > +
> > +/*
> >   * check if there is pending interrupt from
> >   * non-APIC source without intack.
> >   */
> >  static int kvm_cpu_has_extint(struct kvm_vcpu *v)
> >  {
> > -	if (kvm_apic_accept_pic_intr(v))
> > +	u8 accept = kvm_apic_accept_pic_intr(v);
> > +
> > +	if (accept && irqchip_split(v->kvm))
> > +		return pending_userspace_extint(v);
> > +	else if (accept)
> >  		return pic_irqchip(v->kvm)->output;	/* PIC */
> 
> 	if (accept) {
> 		if (irqchip_split(v->kvm))
> 			return pending_userspace_extint(v);
> 		else
> 			return pic_irqchip(v->kvm)->output;
> 	}
> 
> 	return 0;
> 
> >  	else
> >  		return 0;
> > @@ -91,7 +104,12 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
> >   */
> >  static int kvm_cpu_get_extint(struct kvm_vcpu *v)
> >  {
> > -	if (kvm_cpu_has_extint(v))
> > +	if (irqchip_split(v->kvm) && kvm_cpu_has_extint(v)) {
> > +		int vector = v->arch.pending_external_vector;
> > +
> > +		v->arch.pending_external_vector = -1;
> > +		return vector;
> > +	} else if (kvm_cpu_has_extint(v))
> >  		return kvm_pic_read_irq(v->kvm); /* PIC */
> 
> Same as above.
> 
> >  	return -1;
> >  }
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 766d297..012b56ee 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -2094,3 +2094,10 @@ void kvm_lapic_init(void)
> >  	jump_label_rate_limit(&apic_hw_disabled, HZ);
> >  	jump_label_rate_limit(&apic_sw_disabled, HZ);
> >  }
> > +
> > +void kvm_request_pic_injection(struct kvm_vcpu *vcpu)
> > +{
> > +	vcpu->arch.userspace_extint_available = true;
> > +	kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +	kvm_vcpu_kick(vcpu);
> > +}
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 71b150c..7831e4d 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -63,6 +63,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
> >  		unsigned long *dest_map);
> >  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
> >  
> > +void kvm_request_pic_injection(struct kvm_vcpu *vcpu);
> > +
> >  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
> >  		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map);
> >  
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 35d13d4..40e7509 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -65,6 +65,8 @@
> >  #include <asm/pvclock.h>
> >  #include <asm/div64.h>
> >  
> > +#define GET_VECTOR_FROM_USERSPACE 1
> > +
> >  #define MAX_IO_MSRS 256
> >  #define KVM_MAX_MCE_BANKS 32
> >  #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
> > @@ -4217,6 +4219,30 @@ long kvm_arch_vm_ioctl(struct file *filp,
> >  		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
> >  		break;
> >  	}
> > +	case KVM_REQUEST_PIC_INJECTION: {
> > +		int i;
> > +		struct kvm_vcpu *vcpu;
> > +		struct kvm_vcpu *bsp_vcpu = NULL;
> > +
> > +		mutex_lock(&kvm->lock);
> > +		r = -EEXIST;
> > +		if (!irqchip_split(kvm))
> > +			goto out;
> > +		kvm_for_each_vcpu(i, vcpu, kvm) {
> > +			if (kvm_vcpu_is_reset_bsp(vcpu)) {
> > +				bsp_vcpu = vcpu;
> > +				continue;
> > +			}
> > +		}
> > +		r = -EINVAL;
> > +		if (bsp_vcpu == NULL)
> > +			goto interrupt_unlock;
> > +		kvm_request_pic_injection(bsp_vcpu);
> > +		r = 0;
> > +interrupt_unlock:
> > +		mutex_unlock(&kvm->lock);
> > +		break;
> > +	}
> >  
> >  	default:
> >  		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
> > @@ -6194,6 +6220,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
> >  	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
> >  }
> >  
> > +static int must_fetch_userspace_extint(struct kvm_vcpu *vcpu)
> 
> I would rename this to kvm_accept_request_pic_injection.
> 
> Paolo
> 
> > +{
> > +	if (vcpu->arch.userspace_extint_available &&
> > +	    kvm_apic_accept_pic_intr(vcpu)) {
> > +		vcpu->arch.userspace_extint_available = false;
> > +		return true;
> > +	} else
> > +		return false;
> > +
> > +}
> > +
> >  static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> >  {
> >  	int r;
> > @@ -6258,7 +6295,12 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
> >  				return r;
> >  		}
> >  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> > -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> > +			if (irqchip_split(vcpu->kvm) &&
> > +			    must_fetch_userspace_extint(vcpu)) {
> > +				return GET_VECTOR_FROM_USERSPACE;
> > +			}
> > +			kvm_queue_interrupt(vcpu,
> > +					    kvm_cpu_get_interrupt(vcpu),
> >  					    false);
> >  			kvm_x86_ops->set_irq(vcpu);
> >  		}
> > @@ -6424,13 +6466,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >  	}
> >  
> >  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> > +		int event;
> >  		kvm_apic_accept_events(vcpu);
> >  		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
> >  			r = 1;
> >  			goto out;
> >  		}
> > -
> > -		if (inject_pending_event(vcpu, req_int_win) != 0)
> > +		event = inject_pending_event(vcpu, req_int_win);
> > +		if (event == GET_VECTOR_FROM_USERSPACE) {
> > +			vcpu->run->exit_reason = KVM_EXIT_GET_EXTINT;
> > +			kvm_make_request(KVM_REQ_EVENT, vcpu);
> > +			r = 0;
> > +			goto out;
> > +		} else if (event != 0)
> >  			req_immediate_exit = true;
> >  		/* enable NMI/IRQ window open exits if needed */
> >  		else if (vcpu->arch.nmi_pending)
> > @@ -6747,6 +6795,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  	if (vcpu->sigset_active)
> >  		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
> >  
> > +	if (vcpu->run->exit_reason == KVM_EXIT_GET_EXTINT)
> > +		vcpu->arch.pending_external_vector = vcpu->run->extint.vector;
> > +
> >  	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
> >  		kvm_vcpu_block(vcpu);
> >  		kvm_apic_accept_events(vcpu);
> > @@ -7536,6 +7587,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  	kvm_async_pf_hash_reset(vcpu);
> >  	kvm_pmu_init(vcpu);
> >  
> > +	vcpu->arch.pending_external_vector = -1;
> > +
> >  	return 0;
> >  fail_free_wbinvd_dirty_mask:
> >  	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 826a08d..0cf7ed6 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -184,6 +184,7 @@ struct kvm_s390_skeys {
> >  #define KVM_EXIT_SYSTEM_EVENT     24
> >  #define KVM_EXIT_S390_STSI        25
> >  #define KVM_EXIT_IOAPIC_EOI       26
> > +#define KVM_EXIT_GET_EXTINT       27
> >  
> >  /* For KVM_EXIT_INTERNAL_ERROR */
> >  /* Emulate instruction failed. */
> > @@ -334,6 +335,10 @@ struct kvm_run {
> >  		struct {
> >  			__u8 vector;
> >  		} eoi;
> > +		/* KVM_EXIT_GET_EXTINT */
> > +		struct {
> > +			__u8 vector;
> > +		} extint;
> >  		/* Fix the size of the union. */
> >  		char padding[256];
> >  	};
> > @@ -1206,6 +1211,8 @@ struct kvm_s390_ucas_mapping {
> >  /* Available with KVM_CAP_S390_IRQ_STATE */
> >  #define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
> >  #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
> > +/* Available with KVM_CAP_SPLIT_IRQCHIP */
> > +#define KVM_REQUEST_PIC_INJECTION _IO(KVMIO, 0xb7)
> >  
> >  #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
> >  #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
> > 

Pinging this thread.

Should I go with skipping the round trip, and combining
KVM_REQUEST_PIC_INJECTION with the KVM_INTERRUPT (a VCPU IOCTL)?
[It's currently a VM IOCTL, which seems reasonable, given that the
PIC is a per VM device. When skipping the round trip, a VCPU Ioctl
seems sensible, given that an interrupt is associated with a specific
CPU.]

Steve
--
To unsubscribe from this list: send the line "unsubscribe kvm" in



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux