--Andy > On Sep 24, 2017, at 5:55 PM, Wanpeng Li <kernellwp@xxxxxxxxx> wrote: > > 2017-09-22 13:53 GMT+08:00 Ladi Prosek <lprosek@xxxxxxxxxx>: >> For nested virt we maintain multiple VMCS that can run on a vCPU. So it is >> incorrect to keep vmcs_host_cr3 and vmcs_host_cr4, whose purpose is caching >> the value of the rarely changing HOST_CR3 and HOST_CR4 VMCS fields, in >> vCPU-wide data structures. >> >> Hyper-V nested on KVM runs into this consistently for me with PCID enabled. >> CR3 is updated with a new value, unlikely(cr3 != vmx->host_state.vmcs_host_cr3) >> fires, and the currently loaded VMCS is updated. Then we switch from L2 to >> L1 and the next exit reverts CR3 to its old value. > > Could you explain why CR3 is updated when PCID is enabled? An mm doesn't have a fixed ASID in our implementation. When we schedule out, we can come back with a different ASID. When a task is migrated, its ASID on the new CPU is completely unrelated to its ASID on the old CPU. > In > addition, the host cr3/cr4 of both vmcs01 and vmcs02 should be from > the L0, so why there is a difference? > It's caching the value in the VMCS, not the L0 value. > Regards, > Wanpeng Li > >> >> Fixes: d6e41f1151fe ("x86/mm, KVM: Teach KVM's VMX code that CR3 isn't a constant") >> Signed-off-by: Ladi Prosek <lprosek@xxxxxxxxxx> >> --- >> arch/x86/kvm/vmx.c | 16 ++++++++-------- >> 1 file changed, 8 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 0726ca7a1b02..c83d28b0ab05 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -200,6 +200,8 @@ struct loaded_vmcs { >> int cpu; >> bool launched; >> bool nmi_known_unmasked; >> + unsigned long vmcs_host_cr3; /* May not match real cr3 */ >> + unsigned long vmcs_host_cr4; /* May not match real cr4 */ >> struct list_head loaded_vmcss_on_cpu_link; >> }; >> >> @@ -600,8 +602,6 @@ struct vcpu_vmx { >> int gs_ldt_reload_needed; >> int fs_reload_needed; >> u64 msr_host_bndcfgs; >> - unsigned long vmcs_host_cr3; /* May not match real cr3 */ >> - unsigned long vmcs_host_cr4; /* May not match real cr4 */ >> } host_state; >> struct { >> int vm86_active; >> @@ -5178,12 +5178,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) >> */ >> cr3 = __read_cr3(); >> vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ >> - vmx->host_state.vmcs_host_cr3 = cr3; >> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >> >> /* Save the most likely value for this task's CR4 in the VMCS. */ >> cr4 = cr4_read_shadow(); >> vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ >> - vmx->host_state.vmcs_host_cr4 = cr4; >> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >> >> vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ >> #ifdef CONFIG_X86_64 >> @@ -9274,15 +9274,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); >> >> cr3 = __get_current_cr3_fast(); >> - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { >> + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { >> vmcs_writel(HOST_CR3, cr3); >> - vmx->host_state.vmcs_host_cr3 = cr3; >> + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; >> } >> >> cr4 = cr4_read_shadow(); >> - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { >> + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { >> vmcs_writel(HOST_CR4, cr4); >> - vmx->host_state.vmcs_host_cr4 = cr4; >> + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; >> } >> >> /* When single-stepping over STI and MOV SS, we must clear the >> -- >> 2.13.5 >>