This patch implements a new module named vmcsinfo-intel. The module fills VMCSINFO with the VMCS revision identifier, and encoded offsets of VMCS fields. Note, offsets of fields below will not be filled into VMCSINFO: 1. fields defined in Intel specification (Intel? 64 and IA-32 Architectures Software Developer?s Manual, Volume 3C) but not defined in *vmcs_field*. 2. fields don't exist because their corresponding control bits are not set. Signed-off-by: zhangyanfei <zhangyanfei at cn.fujitsu.com> --- arch/x86/kvm/Kconfig | 11 ++ arch/x86/kvm/Makefile | 3 + arch/x86/kvm/vmcsinfo.c | 402 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 416 insertions(+), 0 deletions(-) create mode 100644 arch/x86/kvm/vmcsinfo.c diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1a7fe86..87df9d4 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -62,6 +62,17 @@ config KVM_INTEL To compile this as a module, choose M here: the module will be called kvm-intel. +config VMCSINFO_INTEL + tristate "Export VMCSINFO for Intel processors" + depends on KVM_INTEL + ---help--- + Provides support for exporting VMCSINFO on Intel processors equipped + with the VT extensions. The VMCSINFO contains a VMCS revision + identifier and offsets of VMCS fields. + + To compile this as a module, choose M here: the module + will be called vmcsinfo-intel. + config KVM_AMD tristate "KVM for AMD processors support" depends on KVM diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4f579e8..12a1ef6 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -4,6 +4,7 @@ ccflags-y += -Ivirt/kvm -Iarch/x86/kvm CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. CFLAGS_vmx.o := -I. +CFLAGS_vmcsinfo.o := -I. kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o eventfd.o \ @@ -15,7 +16,9 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o timer.o cpuid.o pmu.o kvm-intel-y += vmx.o kvm-amd-y += svm.o +vmcsinfo-intel-y += vmcsinfo.o obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o +obj-$(CONFIG_VMCSINFO_INTEL) += vmcsinfo-intel.o diff --git a/arch/x86/kvm/vmcsinfo.c b/arch/x86/kvm/vmcsinfo.c new file mode 100644 index 0000000..288c445 --- /dev/null +++ b/arch/x86/kvm/vmcsinfo.c @@ -0,0 +1,402 @@ +/* + * Kernel-based Virtual Machine driver for Linux + * + * This module enables machines with Intel VT-x extensions to export + * offsets of VMCS fields for guest debugging. + * + * Copyright (C) 2012 Fujitsu, Inc. + * + * Authors: + * Zhang Yanfei <zhangyanfei at cn.fujitsu.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include <linux/module.h> +#include <linux/mod_devicetable.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/tboot.h> +#include <linux/kvm_host.h> + +#include <asm/vmx.h> +#include <asm/special_insns.h> +#include <asm/processor-flags.h> +#include <asm/msr.h> +#include <asm/msr-index.h> +#include <asm/vmcsinfo.h> + +MODULE_AUTHOR("Fujitsu"); +MODULE_LICENSE("GPL"); + +static const struct x86_cpu_id vmcsinfo_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_VMX), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, vmcsinfo_cpu_id); + +/* + * For caculating offsets of fields in VMCS data, we index every 16-bit + * field by this kind of format: + * | --------- 16 bits ---------- | + * +-------------+-+------------+-+ + * | high 7 bits |1| low 7 bits |0| + * +-------------+-+------------+-+ + * In high byte, the lowest bit must be 1; In low byte, the lowest bit + * must be 0. The two bits are set like this in case indexes in VMCS + * data are read as big endian mode. + * The remaining 14 bits of the index indicate the real offset of the + * field. Because the size of a VMCS region is at most 4 KBytes, so + * 14 bits are enough to index the whole VMCS region. + * + * ENCODING_OFFSET: encode the offset into the index of this kind. + */ +#define OFFSET_HIGH_SHIFT (7) +#define OFFSET_LOW_MASK ((1 << OFFSET_HIGH_SHIFT) - 1) /* 0x7f */ +#define OFFSET_HIGH_MASK (OFFSET_LOW_MASK << OFFSET_HIGH_SHIFT) /* 0x3f80 */ +#define ENCODING_OFFSET(offset) \ + ((((offset) & OFFSET_LOW_MASK) << 1) + \ + ((((offset) & OFFSET_HIGH_MASK) << 2) | 0x100)) + +/* + * We separate these five control fields from other fields + * because some fields only exist on processors that support + * the 1-setting of control bits in the five control fields. + */ +static inline void append_control_field(void) +{ +#define CONTROL_FIELD_OFFSET(field) \ + VMCSINFO_FIELD(field, vmcs_read32(field)) + + CONTROL_FIELD_OFFSET(PIN_BASED_VM_EXEC_CONTROL); + CONTROL_FIELD_OFFSET(CPU_BASED_VM_EXEC_CONTROL); + if (cpu_has_secondary_exec_ctrls()) { + CONTROL_FIELD_OFFSET(SECONDARY_VM_EXEC_CONTROL); + } + CONTROL_FIELD_OFFSET(VM_EXIT_CONTROLS); + CONTROL_FIELD_OFFSET(VM_ENTRY_CONTROLS); +} + +static inline void append_field16(void) +{ +#define FIELD_OFFSET16(field) \ + VMCSINFO_FIELD(field, vmcs_read16(field)) + + FIELD_OFFSET16(GUEST_ES_SELECTOR); + FIELD_OFFSET16(GUEST_CS_SELECTOR); + FIELD_OFFSET16(GUEST_SS_SELECTOR); + FIELD_OFFSET16(GUEST_DS_SELECTOR); + FIELD_OFFSET16(GUEST_FS_SELECTOR); + FIELD_OFFSET16(GUEST_GS_SELECTOR); + FIELD_OFFSET16(GUEST_LDTR_SELECTOR); + FIELD_OFFSET16(GUEST_TR_SELECTOR); + FIELD_OFFSET16(HOST_ES_SELECTOR); + FIELD_OFFSET16(HOST_CS_SELECTOR); + FIELD_OFFSET16(HOST_SS_SELECTOR); + FIELD_OFFSET16(HOST_DS_SELECTOR); + FIELD_OFFSET16(HOST_FS_SELECTOR); + FIELD_OFFSET16(HOST_GS_SELECTOR); + FIELD_OFFSET16(HOST_TR_SELECTOR); +} + +static inline void append_field64(void) +{ +#define FIELD_OFFSET64(field) \ + VMCSINFO_FIELD(field, vmcs_read64(field)) + + FIELD_OFFSET64(IO_BITMAP_A); + FIELD_OFFSET64(IO_BITMAP_A_HIGH); + FIELD_OFFSET64(IO_BITMAP_B); + FIELD_OFFSET64(IO_BITMAP_B_HIGH); + FIELD_OFFSET64(VM_EXIT_MSR_STORE_ADDR); + FIELD_OFFSET64(VM_EXIT_MSR_STORE_ADDR_HIGH); + FIELD_OFFSET64(VM_EXIT_MSR_LOAD_ADDR); + FIELD_OFFSET64(VM_EXIT_MSR_LOAD_ADDR_HIGH); + FIELD_OFFSET64(VM_ENTRY_MSR_LOAD_ADDR); + FIELD_OFFSET64(VM_ENTRY_MSR_LOAD_ADDR_HIGH); + FIELD_OFFSET64(TSC_OFFSET); + FIELD_OFFSET64(TSC_OFFSET_HIGH); + FIELD_OFFSET64(VMCS_LINK_POINTER); + FIELD_OFFSET64(VMCS_LINK_POINTER_HIGH); + FIELD_OFFSET64(GUEST_IA32_DEBUGCTL); + FIELD_OFFSET64(GUEST_IA32_DEBUGCTL_HIGH); + + if (cpu_has_vmx_msr_bitmap()) { + FIELD_OFFSET64(MSR_BITMAP); + FIELD_OFFSET64(MSR_BITMAP_HIGH); + } + + if (cpu_has_vmx_tpr_shadow()) { + FIELD_OFFSET64(VIRTUAL_APIC_PAGE_ADDR); + FIELD_OFFSET64(VIRTUAL_APIC_PAGE_ADDR_HIGH); + } + + if (cpu_has_secondary_exec_ctrls()) { + if (vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { + FIELD_OFFSET64(APIC_ACCESS_ADDR); + FIELD_OFFSET64(APIC_ACCESS_ADDR_HIGH); + } + if (cpu_has_vmx_ept()) { + FIELD_OFFSET64(EPT_POINTER); + FIELD_OFFSET64(EPT_POINTER_HIGH); + FIELD_OFFSET64(GUEST_PHYSICAL_ADDRESS); + FIELD_OFFSET64(GUEST_PHYSICAL_ADDRESS_HIGH); + FIELD_OFFSET64(GUEST_PDPTR0); + FIELD_OFFSET64(GUEST_PDPTR0_HIGH); + FIELD_OFFSET64(GUEST_PDPTR1); + FIELD_OFFSET64(GUEST_PDPTR1_HIGH); + FIELD_OFFSET64(GUEST_PDPTR2); + FIELD_OFFSET64(GUEST_PDPTR2_HIGH); + FIELD_OFFSET64(GUEST_PDPTR3); + FIELD_OFFSET64(GUEST_PDPTR3_HIGH); + } + } + + if (vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PAT || \ + vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + FIELD_OFFSET64(GUEST_IA32_PAT); + FIELD_OFFSET64(GUEST_IA32_PAT_HIGH); + } + + if (vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_EFER || \ + vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER) { + FIELD_OFFSET64(GUEST_IA32_EFER); + FIELD_OFFSET64(GUEST_IA32_EFER_HIGH); + } + + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) { + FIELD_OFFSET64(GUEST_IA32_PERF_GLOBAL_CTRL); + FIELD_OFFSET64(GUEST_IA32_PERF_GLOBAL_CTRL_HIGH); + } + + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { + FIELD_OFFSET64(HOST_IA32_PAT); + FIELD_OFFSET64(HOST_IA32_PAT_HIGH); + } + + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_EFER) { + FIELD_OFFSET64(HOST_IA32_EFER); + FIELD_OFFSET64(HOST_IA32_EFER_HIGH); + } + + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) { + FIELD_OFFSET64(HOST_IA32_PERF_GLOBAL_CTRL); + FIELD_OFFSET64(HOST_IA32_PERF_GLOBAL_CTRL_HIGH); + } +} + +static inline void append_field32(void) +{ +#define FIELD_OFFSET32(field) \ + VMCSINFO_FIELD(field, vmcs_read32(field)) + + FIELD_OFFSET32(EXCEPTION_BITMAP); + FIELD_OFFSET32(PAGE_FAULT_ERROR_CODE_MASK); + FIELD_OFFSET32(PAGE_FAULT_ERROR_CODE_MATCH); + FIELD_OFFSET32(CR3_TARGET_COUNT); + FIELD_OFFSET32(VM_EXIT_MSR_STORE_COUNT); + FIELD_OFFSET32(VM_EXIT_MSR_LOAD_COUNT); + FIELD_OFFSET32(VM_ENTRY_MSR_LOAD_COUNT); + FIELD_OFFSET32(VM_ENTRY_INTR_INFO_FIELD); + FIELD_OFFSET32(VM_ENTRY_EXCEPTION_ERROR_CODE); + FIELD_OFFSET32(VM_ENTRY_INSTRUCTION_LEN); + FIELD_OFFSET32(VM_INSTRUCTION_ERROR); + FIELD_OFFSET32(VM_EXIT_REASON); + FIELD_OFFSET32(VM_EXIT_INTR_INFO); + FIELD_OFFSET32(VM_EXIT_INTR_ERROR_CODE); + FIELD_OFFSET32(IDT_VECTORING_INFO_FIELD); + FIELD_OFFSET32(IDT_VECTORING_ERROR_CODE); + FIELD_OFFSET32(VM_EXIT_INSTRUCTION_LEN); + FIELD_OFFSET32(VMX_INSTRUCTION_INFO); + FIELD_OFFSET32(GUEST_ES_LIMIT); + FIELD_OFFSET32(GUEST_CS_LIMIT); + FIELD_OFFSET32(GUEST_SS_LIMIT); + FIELD_OFFSET32(GUEST_DS_LIMIT); + FIELD_OFFSET32(GUEST_FS_LIMIT); + FIELD_OFFSET32(GUEST_GS_LIMIT); + FIELD_OFFSET32(GUEST_LDTR_LIMIT); + FIELD_OFFSET32(GUEST_TR_LIMIT); + FIELD_OFFSET32(GUEST_GDTR_LIMIT); + FIELD_OFFSET32(GUEST_IDTR_LIMIT); + FIELD_OFFSET32(GUEST_ES_AR_BYTES); + FIELD_OFFSET32(GUEST_CS_AR_BYTES); + FIELD_OFFSET32(GUEST_SS_AR_BYTES); + FIELD_OFFSET32(GUEST_DS_AR_BYTES); + FIELD_OFFSET32(GUEST_FS_AR_BYTES); + FIELD_OFFSET32(GUEST_GS_AR_BYTES); + FIELD_OFFSET32(GUEST_LDTR_AR_BYTES); + FIELD_OFFSET32(GUEST_TR_AR_BYTES); + FIELD_OFFSET32(GUEST_INTERRUPTIBILITY_INFO); + FIELD_OFFSET32(GUEST_ACTIVITY_STATE); + FIELD_OFFSET32(GUEST_SYSENTER_CS); + FIELD_OFFSET32(HOST_IA32_SYSENTER_CS); + + if (cpu_has_vmx_tpr_shadow()) { + FIELD_OFFSET32(TPR_THRESHOLD); + } + if (cpu_has_secondary_exec_ctrls()) { + if (cpu_has_vmx_ple()) { + FIELD_OFFSET32(PLE_GAP); + FIELD_OFFSET32(PLE_WINDOW); + } + } +} + +static inline void append_field(void) +{ +#define FIELD_OFFSET(field) \ + VMCSINFO_FIELD(field, vmcs_readl(field)) + + FIELD_OFFSET(CR0_GUEST_HOST_MASK); + FIELD_OFFSET(CR4_GUEST_HOST_MASK); + FIELD_OFFSET(CR0_READ_SHADOW); + FIELD_OFFSET(CR4_READ_SHADOW); + FIELD_OFFSET(CR3_TARGET_VALUE0); + FIELD_OFFSET(CR3_TARGET_VALUE1); + FIELD_OFFSET(CR3_TARGET_VALUE2); + FIELD_OFFSET(CR3_TARGET_VALUE3); + FIELD_OFFSET(EXIT_QUALIFICATION); + FIELD_OFFSET(GUEST_LINEAR_ADDRESS); + FIELD_OFFSET(GUEST_CR0); + FIELD_OFFSET(GUEST_CR3); + FIELD_OFFSET(GUEST_CR4); + FIELD_OFFSET(GUEST_ES_BASE); + FIELD_OFFSET(GUEST_CS_BASE); + FIELD_OFFSET(GUEST_SS_BASE); + FIELD_OFFSET(GUEST_DS_BASE); + FIELD_OFFSET(GUEST_FS_BASE); + FIELD_OFFSET(GUEST_GS_BASE); + FIELD_OFFSET(GUEST_LDTR_BASE); + FIELD_OFFSET(GUEST_TR_BASE); + FIELD_OFFSET(GUEST_GDTR_BASE); + FIELD_OFFSET(GUEST_IDTR_BASE); + FIELD_OFFSET(GUEST_DR7); + FIELD_OFFSET(GUEST_RSP); + FIELD_OFFSET(GUEST_RIP); + FIELD_OFFSET(GUEST_RFLAGS); + FIELD_OFFSET(GUEST_PENDING_DBG_EXCEPTIONS); + FIELD_OFFSET(GUEST_SYSENTER_ESP); + FIELD_OFFSET(GUEST_SYSENTER_EIP); + FIELD_OFFSET(HOST_CR0); + FIELD_OFFSET(HOST_CR3); + FIELD_OFFSET(HOST_CR4); + FIELD_OFFSET(HOST_FS_BASE); + FIELD_OFFSET(HOST_GS_BASE); + FIELD_OFFSET(HOST_TR_BASE); + FIELD_OFFSET(HOST_GDTR_BASE); + FIELD_OFFSET(HOST_IDTR_BASE); + FIELD_OFFSET(HOST_IA32_SYSENTER_ESP); + FIELD_OFFSET(HOST_IA32_SYSENTER_EIP); + FIELD_OFFSET(HOST_RSP); + FIELD_OFFSET(HOST_RIP); +} + +/* + * The format of VMCSINFO is given below: + * +-------------+--------------------------+ + * | Byte offset | Contents | + * +-------------+--------------------------+ + * | 0 | VMCS revision identifier | + * +-------------+--------------------------+ + * | 4 | <field><encoded offset> | + * +-------------+--------------------------+ + * | 16 | <field><encoded offset> | + * +-------------+--------------------------+ + * ...... + * + * The first 32 bits of VMCSINFO contains the VMCS revision + * identifier. + * The remainder of VMCSINFO is used for <field><encoded offset> + * sets. Each set takes 12 bytes: field occupys 4 bytes + * and its corresponding encoded offset occupys 8 bytes. + * + * Encoded offsets are raw values read by vmcs_read{16, 64, 32, l}, + * and they are all unsigned extended to 8 bytes for each + * <field><encoded offset> set has the same size. + * We do not decode offsets here. The decoding work is delayed + * in userspace tools. + * + * Note, offsets of fields below will not be filled into + * VMCSINFO: + * 1. fields defined in Intel specification (Intel? 64 and + * IA-32 Architectures Software Developer?s Manual, Volume + * 3C) but not defined in *vmcs_field*. + * 2. fields don't exist because their corresponding + * control bits are not set. + */ +static int __init alloc_vmcsinfo_init(void) +{ +/* + * The first 8 bytes in vmcs region are for + * VMCS revision identifier + * VMX-abort indicator + */ +#define FIELD_START (8) + + int r, offset; + struct vmcs *vmcs; + int cpu; + + if (vmcsinfo_size) + return 0; + + vmcs = alloc_vmcs(); + if (!vmcs) { + return -ENOMEM; + } + + r = hardware_enable_all(); + if (r) + goto out_err; + + /* + * Write encoded offsets into VMCS data for later vmcs_read. + */ + for (offset = FIELD_START; offset < vmcs_config.size; + offset += sizeof(u16)) + *(u16 *)((char *)vmcs + offset) = ENCODING_OFFSET(offset); + + cpu = get_cpu(); + vmcs_clear(vmcs); + per_cpu(current_vmcs, cpu) = vmcs; + vmcs_load(vmcs); + + VMCSINFO_REVISION_ID(vmcs->revision_id); + append_control_field(); + + vmcs_write_control_field(PIN_BASED_VM_EXEC_CONTROL, + vmcs_config.pin_based_exec_ctrl); + vmcs_write_control_field(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl); + if (cpu_has_secondary_exec_ctrls()) { + vmcs_write_control_field(SECONDARY_VM_EXEC_CONTROL, + vmcs_config.cpu_based_2nd_exec_ctrl); + } + vmcs_write_control_field(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + vmcs_write_control_field(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); + + append_field16(); + append_field64(); + append_field32(); + append_field(); + + update_vmcsinfo_note(); + + vmcs_clear(vmcs); + put_cpu(); + +out_err: + free_vmcs(vmcs); + return r; +} + +static void __exit alloc_vmcsinfo_exit(void) +{ + hardware_disable_all(); +} + +module_init(alloc_vmcsinfo_init); +module_exit(alloc_vmcsinfo_exit); -- 1.7.1