On 26/05/2017 02:47, Andy Lutomirski wrote: > When PCID is enabled, CR3's PCID bits can change during context > switches, so KVM won't be able to treat CR3 as a per-mm constant any > more. > > I structured this like the existing CR4 handling. Under ordinary > circumstances (PCID disabled or if the current PCID and the value > that's already in the VMCS match), then we won't do an extra VMCS > write, and we'll never do an extra direct CR3 read. The overhead > should be minimal. > > I disallowed using the new helper in non-atomic context because > PCID support will cause CR3 to stop being constant in non-atomic > process context. > > (Frankly, it also scares me a bit that KVM ever treated CR3 as > constant, but it looks like it was okay before.) > > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> > Cc: kvm@xxxxxxxxxxxxxxx > Cc: Rik van Riel <riel@xxxxxxxxxx> > Cc: Dave Hansen <dave.hansen@xxxxxxxxx> > Cc: Nadav Amit <namit@xxxxxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxx> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx> > Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> > --- > arch/x86/include/asm/mmu_context.h | 19 +++++++++++++++++++ > arch/x86/kvm/vmx.c | 21 ++++++++++++++++++--- > 2 files changed, 37 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h > index 187c39470a0b..f20d7ea47095 100644 > --- a/arch/x86/include/asm/mmu_context.h > +++ b/arch/x86/include/asm/mmu_context.h > @@ -266,4 +266,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, > return __pkru_allows_pkey(vma_pkey(vma), write); > } > > + > +/* > + * This can be used from process context to figure out what the value of > + * CR3 is without needing to do a (slow) read_cr3(). > + * > + * It's intended to be used for code like KVM that sneakily changes CR3 > + * and needs to restore it. It needs to be used very carefully. > + */ > +static inline unsigned long __get_current_cr3_fast(void) > +{ > + unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd); > + > + /* For now, be very restrictive about when this can be called. */ > + VM_WARN_ON(in_nmi() || !in_atomic()); > + > + VM_BUG_ON(cr3 != read_cr3()); > + return cr3; > +} > + > #endif /* _ASM_X86_MMU_CONTEXT_H */ > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 72f78396bc09..b7b36c9ffa3d 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -48,6 +48,7 @@ > #include <asm/kexec.h> > #include <asm/apic.h> > #include <asm/irq_remapping.h> > +#include <asm/mmu_context.h> > > #include "trace.h" > #include "pmu.h" > @@ -596,6 +597,7 @@ struct vcpu_vmx { > int gs_ldt_reload_needed; > int fs_reload_needed; > u64 msr_host_bndcfgs; > + unsigned long vmcs_host_cr3; /* May not match real cr3 */ > unsigned long vmcs_host_cr4; /* May not match real cr4 */ > } host_state; > struct { > @@ -5012,12 +5014,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > u32 low32, high32; > unsigned long tmpl; > struct desc_ptr dt; > - unsigned long cr0, cr4; > + unsigned long cr0, cr3, cr4; > > cr0 = read_cr0(); > WARN_ON(cr0 & X86_CR0_TS); > vmcs_writel(HOST_CR0, cr0); /* 22.2.3 */ > - vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ > + > + /* > + * Save the most likely value for this task's CR3 in the VMCS. > + * We can't use __get_current_cr3_fast() because we're not atomic. > + */ > + cr3 = read_cr3(); > + vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ > + vmx->host_state.vmcs_host_cr3 = cr3; > > /* Save the most likely value for this task's CR4 in the VMCS. */ > cr4 = cr4_read_shadow(); > @@ -8843,7 +8852,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) > static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > - unsigned long debugctlmsr, cr4; > + unsigned long debugctlmsr, cr3, cr4; > > /* Don't enter VMX if guest state is invalid, let the exit handler > start emulation until we arrive back to a valid state */ > @@ -8865,6 +8874,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) > vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); > > + cr3 = __get_current_cr3_fast(); > + if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { > + vmcs_writel(HOST_CR3, cr3); > + vmx->host_state.vmcs_host_cr3 = cr3; > + } > + > cr4 = cr4_read_shadow(); > if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { > vmcs_writel(HOST_CR4, cr4); > Queued, thanks. If anybody needs a topic branch, please holler. Paolo