On Mon, Jan 21, 2013 at 05:59:07PM -0200, Marcelo Tosatti wrote: > On Wed, Jan 16, 2013 at 06:21:11PM +0800, Yang Zhang wrote: > > From: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > > > basically to benefit from apicv, we need to enable virtualized x2apic mode. > > Currently, we only enable it when guest is really using x2apic. > > > > Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled x2apic: > > 0x800 - 0x8ff: no read intercept for apicv register virtualization, > > except APIC ID and TMCCT which need software's assistance to > > get right value. > > > > Signed-off-by: Kevin Tian <kevin.tian@xxxxxxxxx> > > Signed-off-by: Yang Zhang <yang.z.zhang@xxxxxxxxx> > > --- > > arch/x86/include/asm/kvm_host.h | 1 + > > arch/x86/include/asm/vmx.h | 1 + > > arch/x86/kvm/lapic.c | 20 ++-- > > arch/x86/kvm/lapic.h | 5 + > > arch/x86/kvm/svm.c | 6 + > > arch/x86/kvm/vmx.c | 204 +++++++++++++++++++++++++++++++++++---- > > 6 files changed, 209 insertions(+), 28 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index c431b33..35aa8e6 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -697,6 +697,7 @@ struct kvm_x86_ops { > > void (*enable_nmi_window)(struct kvm_vcpu *vcpu); > > void (*enable_irq_window)(struct kvm_vcpu *vcpu); > > void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); > > + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); > > int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); > > int (*get_tdp_level)(void); > > u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); > > diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h > > index 44c3f7e..0a54df0 100644 > > --- a/arch/x86/include/asm/vmx.h > > +++ b/arch/x86/include/asm/vmx.h > > @@ -139,6 +139,7 @@ > > #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 > > #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 > > #define SECONDARY_EXEC_RDTSCP 0x00000008 > > +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 > > #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 > > #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 > > #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index 0664c13..f39aee3 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -140,11 +140,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) > > (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ > > APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) > > > > -static inline int apic_x2apic_mode(struct kvm_lapic *apic) > > -{ > > - return apic->vcpu->arch.apic_base & X2APIC_ENABLE; > > -} > > - > > static inline int kvm_apic_id(struct kvm_lapic *apic) > > { > > return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; > > @@ -1323,12 +1318,17 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) > > if (!kvm_vcpu_is_bsp(apic->vcpu)) > > value &= ~MSR_IA32_APICBASE_BSP; > > > > - vcpu->arch.apic_base = value; > > - if (apic_x2apic_mode(apic)) { > > - u32 id = kvm_apic_id(apic); > > - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > > - kvm_apic_set_ldr(apic, ldr); > > + if ((vcpu->arch.apic_base ^ value) & X2APIC_ENABLE) { > > + if (value & X2APIC_ENABLE) { > > + u32 id = kvm_apic_id(apic); > > + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > > + kvm_apic_set_ldr(apic, ldr); > > + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); > > + } else > > + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); > > } > > + > > + vcpu->arch.apic_base = value; > > Simpler to have > > if (apic_x2apic_mode(apic)) { > ... > kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); > } else { > kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); > } > This will not work during cpu init. That was discussed on one of the previous iterations of the patch. When this code is called during vcpu init vmcs is not loaded yet so set_virtual_x2apic_mode() cannot write into it. > Also it must be done after assignment of vcpu->arch.apic_base (this > patch has vcpu->arch.apic_base being read from > ->set_virtual_x2apic_mode() path). > > > +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) > > +{ > > + unsigned long *msr_bitmap; > > + > > + if (apic_x2apic_mode(vcpu->arch.apic)) > > vcpu->arch.apic can be NULL. > > > +static void vmx_intercept_for_msr_read_x2apic(u32 msr, bool set) > > +{ > > + if (set) { > > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > > + msr, MSR_TYPE_R); > > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > > + msr, MSR_TYPE_R); > > + } else { > > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > > + msr, MSR_TYPE_R); > > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > > + msr, MSR_TYPE_R); > > + } > > +} > > Please retain the enable_intercept/disable_intercept naming in the > function name, instead of a set parameter. > > > +static void vmx_intercept_for_msr_write_x2apic(u32 msr, bool set) > > +{ > > + if (set) { > > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > > + msr, MSR_TYPE_W); > > + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > > + msr, MSR_TYPE_W); > > + } else { > > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, > > + msr, MSR_TYPE_W); > > + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, > > + msr, MSR_TYPE_W); > > + } > > } > > Same here. > > > @@ -3848,6 +3950,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) > > exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; > > if (!enable_apicv_reg) > > exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; > > + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > > return exec_control; > > Unconditionally disabling SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE? Its > awkward. > That is how vmx_secondary_exec_control works though. It takes what can be set in secondary exec control and drops what should not be set there. > > + struct vcpu_vmx *vmx = to_vmx(vcpu); > > + > > + /* There is not point to enable virtualize x2apic without enable > > + * apicv*/ > > + if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) > > + return; > > + > > + if (set) { > > + exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); > > + /* virtualize x2apic mode relies on tpr shadow */ > > + if (!(exec_control & CPU_BASED_TPR_SHADOW)) > > + return; > > + } > > + > > + sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); > > + > > + if (set) { > > + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > > + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > > + } else { > > + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; > > + if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) > > + sec_exec_control |= > > + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; > > + } > > + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); > > + > > + if (set) { > > + for (msr = 0x800; msr <= 0x8ff; msr++) > > + vmx_intercept_for_msr_read_x2apic(msr, false); > > + > > + /* According SDM, in x2apic mode, the whole id reg is used. > > + * But in KVM, it only use the highest eight bits. Need to > > + * intercept it */ > > + vmx_intercept_for_msr_read_x2apic(0x802, true); > > + /* TMCCT */ > > + vmx_intercept_for_msr_read_x2apic(0x839, true); > > + /* TPR */ > > + vmx_intercept_for_msr_write_x2apic(0x808, false); > > + } > > Why not disable write intercept for all MSRs which represent APIC registers > that are virtualized? Why TPR is special? > This patch goes before vid is enabled. At this point only TPR is vitalized. If APIC_WRITE exit will be generated on unhandled MSR write then we can disable intercept for all x2apic MSRs here. -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html