KVM_CAP_X2APIC_API can be enabled to extend APIC ID in get/set ioctl and MSI addresses to 32 bits. Both are needed to support x2APIC. The capability has to be toggleable and disabled by default, because get/set ioctl shifted and truncated APIC ID to 8 bits by using a non-standard protocol inspired by xAPIC and the change is not backward-compatible. Changes to MSI addresses follow the format used by interrupt remapping unit. The upper address word, that used to be 0, contains upper 24 bits of the LAPIC address in its upper 24 bits. Lower 8 bits are reserved as 0. Using the upper address word is not backward-compatible either as we didn't check that userspace zeroed the word. Reserved bits are still not explicitly checked, but non-zero data will affect LAPIC addresses, which will cause a bug. Signed-off-by: Radim Krčmář <rkrcmar@xxxxxxxxxx> --- v1: * rewritten with a toggleable capability [Paolo] * dropped MSI_ADDR_EXT_DEST_ID to enforce reserved bits Documentation/virtual/kvm/api.txt | 26 ++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 4 +++- arch/x86/kvm/irq_comm.c | 14 ++++++++++---- arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 12 ++++++++++++ include/uapi/linux/kvm.h | 1 + 7 files changed, 54 insertions(+), 7 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 09efa9eb3926..0f978089a0f6 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1482,6 +1482,9 @@ struct kvm_irq_routing_msi { __u32 pad; }; +If KVM_CAP_X2APIC_API is enabled, then address_hi bits 31-8 contain bits 31-8 +of destination id and address_hi bits 7-0 is must be 0. + struct kvm_irq_routing_s390_adapter { __u64 ind_addr; __u64 summary_addr; @@ -1583,6 +1586,13 @@ struct kvm_lapic_state { Reads the Local APIC registers and copies them into the input argument. The data format and layout are the same as documented in the architecture manual. +If KVM_CAP_X2APIC_API is enabled, then the format of APIC_ID register depends +on APIC mode (reported by MSR_IA32_APICBASE) of its VCPU. The format follows +xAPIC otherwise. + +x2APIC stores APIC ID as little endian in bits 31-0 of APIC_ID register. +xAPIC stores bits 7-0 of APIC ID in register bits 31-24. + 4.58 KVM_SET_LAPIC @@ -1600,6 +1610,8 @@ struct kvm_lapic_state { Copies the input argument into the Local APIC registers. The data format and layout are the same as documented in the architecture manual. +See the note about APIC_ID register in KVM_GET_LAPIC. + 4.59 KVM_IOEVENTFD @@ -2180,6 +2192,9 @@ struct kvm_msi { No flags are defined so far. The corresponding field must be 0. +If KVM_CAP_X2APIC_API is enabled, then address_hi bits 31-8 contain bits 31-8 +of destination id and address_hi bits 7-0 is must be 0. + 4.71 KVM_CREATE_PIT2 @@ -3811,6 +3826,17 @@ Allows use of runtime-instrumentation introduced with zEC12 processor. Will return -EINVAL if the machine does not support runtime-instrumentation. Will return -EBUSY if a VCPU has already been created. +7.7 KVM_CAP_X2APIC_API + +Architectures: x86 +Parameters: none +Returns: 0 on success, -EINVAL if reserved parameters are not 0 + +Enabling this capability changes the behavior of KVM_SET_GSI_ROUTING, +KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC. See KVM_CAP_X2APIC_API +in their respective sections. + + 8. Other capabilities. ---------------------- diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 459a789cb3da..48b0ca18066c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -782,6 +782,8 @@ struct kvm_arch { u32 ldr_mode; struct page *avic_logical_id_table_page; struct page *avic_physical_id_table_page; + + bool x2apic_api; }; struct kvm_vm_stat { @@ -1365,7 +1367,7 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq); + struct kvm_lapic_irq *irq, bool x2apic_api); static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 47ad681a33fd..4594644ab090 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -111,12 +111,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm_lapic_irq *irq) + struct kvm_lapic_irq *irq, bool x2apic_api) { trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); irq->dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + if (x2apic_api) + /* MSI_ADDR_EXT_DEST_ID() is omitted to introduce bugs on + * userspaces that set reserved bits 0-7. + */ + irq->dest_id |= e->msi.address_hi; irq->vector = (e->msi.data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; @@ -137,7 +142,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, if (!level) return -1; - kvm_set_msi_irq(e, &irq); + kvm_set_msi_irq(e, &irq, kvm->arch.x2apic_api); return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); } @@ -153,7 +158,7 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) return -EWOULDBLOCK; - kvm_set_msi_irq(e, &irq); + kvm_set_msi_irq(e, &irq, kvm->arch.x2apic_api); if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) return r; @@ -393,7 +398,8 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, if (entry->type != KVM_IRQ_ROUTING_MSI) continue; - kvm_set_msi_irq(entry, &irq); + kvm_set_msi_irq(entry, &irq, + vcpu->kvm->arch.x2apic_api); if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 46eb71c425cf..178605635df5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1983,7 +1983,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) static void __kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { - if (apic_x2apic_mode(vcpu->arch.apic)) { + if (apic_x2apic_mode(vcpu->arch.apic) && !vcpu->kvm->arch.x2apic_api) { u32 *id = (u32 *)(s->regs + APIC_ID); if (set) *id >>= 24; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a10038258b80..ea1f439b444e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11075,7 +11075,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, * We will support full lowest-priority interrupt later. */ - kvm_set_msi_irq(e, &irq); + kvm_set_msi_irq(e, &irq, kvm->arch.x2apic_api); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { /* * Make sure the IRTE is in remapped mode if diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 043f110f2210..16b55f09dd16 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2576,6 +2576,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_DISABLE_QUIRKS: case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: + case KVM_CAP_X2APIC_API: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3799,6 +3800,17 @@ split_irqchip_unlock: mutex_unlock(&kvm->lock); break; } + case KVM_CAP_X2APIC_API: { + struct kvm_enable_cap valid = {.cap = KVM_CAP_X2APIC_API}; + + r = -EINVAL; + if (memcmp(cap, &valid, sizeof(valid))) + break; + + kvm->arch.x2apic_api = true; + r = 0; + break; + } default: r = -EINVAL; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 05ebf475104c..43b355d6db7b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -866,6 +866,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_PMU_V3 126 #define KVM_CAP_VCPU_ATTRIBUTES 127 #define KVM_CAP_MAX_VCPU_ID 128 +#define KVM_CAP_X2APIC_API 129 #ifdef KVM_CAP_IRQ_ROUTING -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html