On 30/07/2015 08:21, Steve Rutherford wrote: > First patch in a series which enables the relocation of the > PIC/IOAPIC to userspace. > > Adds capability KVM_CAP_SPLIT_IRQCHIP; > > KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the > rest of the irqchip. > > Compile tested for x86. > > Signed-off-by: Steve Rutherford <srutherford@xxxxxxxxxx> > Suggested-by: Andrew Honig <ahonig@xxxxxxxxxx> > --- > Documentation/virtual/kvm/api.txt | 15 +++++++++++++++ > arch/x86/include/asm/kvm_host.h | 2 ++ > arch/x86/kvm/i8254.c | 4 +++- > arch/x86/kvm/ioapic.h | 8 ++++++++ > arch/x86/kvm/irq.h | 11 ++++++++++- > arch/x86/kvm/irq_comm.c | 9 ++++++++- > arch/x86/kvm/lapic.c | 6 ++++-- > arch/x86/kvm/vmx.c | 4 ++-- > arch/x86/kvm/x86.c | 25 +++++++++++++++++++++++-- > include/linux/kvm_host.h | 1 + > include/uapi/linux/kvm.h | 1 + > 11 files changed, 77 insertions(+), 9 deletions(-) > > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt > index a4ebcb7..b655024 100644 > --- a/Documentation/virtual/kvm/api.txt > +++ b/Documentation/virtual/kvm/api.txt > @@ -3620,6 +3620,21 @@ struct { > > KVM handlers should exit to userspace with rc = -EREMOTE. > > +7.5 KVM_SPLIT_IRQCHIP > + > +Architectures: x86 > +Parameters: None > +Returns: 0 on success, -1 on error > + > +Create a local apic for each processor in the kernel. With this capability > +enabled, the userspace VMM is expected to emulate the IOAPIC and PIC. > + > +This supersedes KVM_CREATE_IRQCHIP, creating only local APICs, but no in kernel > +IOAPIC or PIC. This also enables in kernel routing of interrupt requests. > + > +Fails if VCPU has already been created, or if the irqchip is already in the > +kernel (i.e. KVM_CREATE_IRQCHIP has already been called). > + > > 8. Other capabilities. > ---------------------- > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index d3e7a53..b4fdf0c 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -670,6 +670,8 @@ struct kvm_arch { > bool boot_vcpu_runs_old_kvmclock; > > u64 disabled_quirks; > + > + bool irqchip_split; > }; > > struct kvm_vm_stat { > diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c > index f588eb7..08116ff 100644 > --- a/arch/x86/kvm/i8254.c > +++ b/arch/x86/kvm/i8254.c > @@ -35,6 +35,7 @@ > #include <linux/kvm_host.h> > #include <linux/slab.h> > > +#include "ioapic.h" > #include "irq.h" > #include "i8254.h" > #include "x86.h" > @@ -333,7 +334,8 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) > struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; > s64 interval; > > - if (ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) > + if (!ioapic_in_kernel(kvm) || > + ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) > return; > > interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); > diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h > index bf36d66..a8842c0 100644 > --- a/arch/x86/kvm/ioapic.h > +++ b/arch/x86/kvm/ioapic.h > @@ -97,6 +97,14 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) > return kvm->arch.vioapic; > } > > +static inline int ioapic_in_kernel(struct kvm *kvm) > +{ > + int ret; > + > + ret = (ioapic_irqchip(kvm) != NULL); > + return ret; > +} > + > void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); > bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, > int short_hand, unsigned int dest, int dest_mode); > diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h > index 3d782a2..72af5a9 100644 > --- a/arch/x86/kvm/irq.h > +++ b/arch/x86/kvm/irq.h > @@ -83,13 +83,22 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) > return kvm->arch.vpic; > } > > +static inline int irqchip_split(struct kvm *kvm) > +{ > + return kvm->arch.irqchip_split; > +} > + > static inline int irqchip_in_kernel(struct kvm *kvm) > { > + bool ret; > struct kvm_pic *vpic = pic_irqchip(kvm); > > /* Read vpic before kvm->irq_routing. */ > smp_rmb(); > - return vpic != NULL; > + ret = (vpic != NULL); > + ret |= irqchip_split(kvm); The memory barrier must be after irqchip_split. > + > + return ret; > } > > void kvm_pic_reset(struct kvm_kpic_state *s); > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > index 9efff9e..67f6b62 100644 > --- a/arch/x86/kvm/irq_comm.c > +++ b/arch/x86/kvm/irq_comm.c > @@ -208,7 +208,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) > goto unlock; > } > clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); > - if (!irqchip_in_kernel(kvm)) > + if (!ioapic_in_kernel(kvm)) > goto unlock; > > kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); > @@ -328,3 +328,10 @@ int kvm_setup_default_irq_routing(struct kvm *kvm) > return kvm_set_irq_routing(kvm, default_routing, > ARRAY_SIZE(default_routing), 0); > } > + > +static const struct kvm_irq_routing_entry empty_routing[] = {}; > + > +int kvm_setup_empty_irq_routing(struct kvm *kvm) > +{ > + return kvm_set_irq_routing(kvm, empty_routing, 0, 0); > +} > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index c158f46..2f486d8 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -209,7 +209,8 @@ out: > if (old) > kfree_rcu(old, rcu); > > - kvm_vcpu_request_scan_ioapic(kvm); > + if (!irqchip_split(kvm)) Might as well use ioapic_in_kernel here. > + kvm_vcpu_request_scan_ioapic(kvm); > } > > static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) > @@ -1845,7 +1846,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, > kvm_x86_ops->hwapic_isr_update(vcpu->kvm, > apic_find_highest_isr(apic)); > kvm_make_request(KVM_REQ_EVENT, vcpu); > - kvm_rtc_eoi_tracking_restore_one(vcpu); > + if (!ioapic_in_kernel(vcpu->kvm)) This should not be negated. > + kvm_rtc_eoi_tracking_restore_one(vcpu); > } > > void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 7323ea8..3a9653f 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -948,7 +948,7 @@ static inline bool cpu_has_vmx_tpr_shadow(void) > > static inline bool vm_need_tpr_shadow(struct kvm *kvm) > { > - return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); > + return (cpu_has_vmx_tpr_shadow()) && irqchip_in_kernel(kvm); > } > > static inline bool cpu_has_secondary_exec_ctrls(void) > @@ -9486,7 +9486,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) > /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are > * emulated by vmx_set_efer(), below. > */ > - vm_entry_controls_init(vmx, > + vm_entry_controls_init(vmx, > (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & > ~VM_ENTRY_IA32E_MODE) | > (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index c6c6e9c..8e40ddf 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2459,6 +2459,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) > case KVM_CAP_TSC_DEADLINE_TIMER: > case KVM_CAP_ENABLE_CAP_VM: > case KVM_CAP_DISABLE_QUIRKS: > + case KVM_CAP_SPLIT_IRQCHIP: > #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT > case KVM_CAP_ASSIGN_DEV_IRQ: > case KVM_CAP_PCI_2_3: > @@ -3566,6 +3567,26 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, > kvm->arch.disabled_quirks = cap->args[0]; > r = 0; > break; > + case KVM_CAP_SPLIT_IRQCHIP: { > + mutex_lock(&kvm->lock); > + r = -EEXIST; > + if (irqchip_in_kernel(kvm)) > + goto split_irqchip_unlock; > + if (!irqchip_split(kvm)) { This "if" must be true. > + if (atomic_read(&kvm->online_vcpus)) > + goto split_irqchip_unlock; > + r = kvm_setup_empty_irq_routing(kvm); > + if (r) > + goto split_irqchip_unlock; > + /* Pairs with irqchip_in_kernel. */ > + smp_wmb(); > + kvm->arch.irqchip_split = true; > + } > + r = 0; > +split_irqchip_unlock: > + mutex_unlock(&kvm->lock); > + break; > + } > default: > r = -EINVAL; > break; > @@ -3679,7 +3700,7 @@ long kvm_arch_vm_ioctl(struct file *filp, > } > > r = -ENXIO; > - if (!irqchip_in_kernel(kvm)) > + if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm)) You're using ioapic_in_kernel for the 8259 too. Better test !irqchip_in_kernel || irqchip_split here. > goto get_irqchip_out; > r = kvm_vm_ioctl_get_irqchip(kvm, chip); > if (r) > @@ -3703,7 +3724,7 @@ long kvm_arch_vm_ioctl(struct file *filp, > } > > r = -ENXIO; > - if (!irqchip_in_kernel(kvm)) > + if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm)) Same here. > goto set_irqchip_out; > r = kvm_vm_ioctl_set_irqchip(kvm, chip); > if (r) > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 29c0c28..1867b83 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -1004,6 +1004,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) > #endif > > int kvm_setup_default_irq_routing(struct kvm *kvm); > +int kvm_setup_empty_irq_routing(struct kvm *kvm); > int kvm_set_irq_routing(struct kvm *kvm, > const struct kvm_irq_routing_entry *entries, > unsigned nr, > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 9ef19eb..e4304d0 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -818,6 +818,7 @@ struct kvm_ppc_smmu_info { > #define KVM_CAP_DISABLE_QUIRKS 116 > #define KVM_CAP_X86_SMM 117 > #define KVM_CAP_MULTI_ADDRESS_SPACE 118 > +#define KVM_CAP_SPLIT_IRQCHIP 119 > > #ifdef KVM_CAP_IRQ_ROUTING > > Please review this diff: diff --git b/arch/x86/kvm/irq.h a/arch/x86/kvm/irq.h index 72af5a989a2e..975cf33ef306 100644 --- b/arch/x86/kvm/irq.h +++ a/arch/x86/kvm/irq.h @@ -93,11 +90,11 @@ static inline int irqchip_split(struct kvm *kvm) bool ret; struct kvm_pic *vpic = pic_irqchip(kvm); - /* Read vpic before kvm->irq_routing. */ - smp_rmb(); ret = (vpic != NULL); ret |= irqchip_split(kvm); + /* Read vpic before kvm->irq_routing. */ + smp_rmb(); return ret; } diff --git b/arch/x86/kvm/lapic.c a/arch/x86/kvm/lapic.c index 2f486d8ecdae..a86324ca9cc3 100644 --- b/arch/x86/kvm/lapic.c +++ a/arch/x86/kvm/lapic.c @@ -209,7 +209,7 @@ out: if (old) kfree_rcu(old, rcu); - if (!irqchip_split(kvm)) + if (ioapic_in_kernel(kvm)) kvm_vcpu_request_scan_ioapic(kvm); } @@ -1846,7 +1846,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); - if (!ioapic_in_kernel(vcpu->kvm)) + if (ioapic_in_kernel(vcpu->kvm)) kvm_rtc_eoi_tracking_restore_one(vcpu); } diff --git b/arch/x86/kvm/x86.c a/arch/x86/kvm/x86.c index 49a98608e3f6..5d2b8695732c 100644 --- b/arch/x86/kvm/x86.c +++ a/arch/x86/kvm/x86.c @@ -3573,16 +3573,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = -EEXIST; if (irqchip_in_kernel(kvm)) goto split_irqchip_unlock; - if (!irqchip_split(kvm)) { - if (atomic_read(&kvm->online_vcpus)) - goto split_irqchip_unlock; - r = kvm_setup_empty_irq_routing(kvm); - if (r) - goto split_irqchip_unlock; - /* Pairs with irqchip_in_kernel. */ - smp_wmb(); - kvm->arch.irqchip_split = true; - } + if (atomic_read(&kvm->online_vcpus)) + goto split_irqchip_unlock; + r = kvm_setup_empty_irq_routing(kvm); + if (r) + goto split_irqchip_unlock; + /* Pairs with irqchip_in_kernel. */ + smp_wmb(); + kvm->arch.irqchip_split = true; r = 0; split_irqchip_unlock: mutex_unlock(&kvm->lock); @@ -3701,7 +3699,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } r = -ENXIO; - if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm)) + if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) goto get_irqchip_out; r = kvm_vm_ioctl_get_irqchip(kvm, chip); if (r) @@ -3725,7 +3723,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } r = -ENXIO; - if (!irqchip_in_kernel(kvm) || !ioapic_in_kernel(kvm)) + if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) goto set_irqchip_out; r = kvm_vm_ioctl_set_irqchip(kvm, chip); if (r) No need to resend. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html