Do vmx execution/vmexit/vmetry control fields initialization based on vmcs_config (setup by __setup_vmcs_config) for host vcpu. CPU_BASED_CR3_LOAD_EXITING & CPU_BASED_CR3_STORE_EXITING are always set from __setup_vmcs_config but pKVM does not want them, so clear them from cpu_based_exec_ctrl. CPU_BASED_INTR_WINDOW_EXITING is toggled dynamically (for later nmi handling), so clear it first from cpu_based_exec_ctrl. SECONDARY_EXEC_ENABLE_EPT & SECONDARY_EXEC_ENABLE_VPID in cpu_based_2nd_exec_ctrl are disabled before EPT page table created for host VM in pKVM. The control field configuration in this patch also makes host VM handle its interrupt & exception directly, and fully own CR0 & CR3, for CR4, only VMXE bit is intercepted by pKVM, and the MSR intercept setting need updating to pre-allocated msr_bitmap. Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx> Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx> --- arch/x86/kvm/vmx/pkvm/pkvm_host.c | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/arch/x86/kvm/vmx/pkvm/pkvm_host.c b/arch/x86/kvm/vmx/pkvm/pkvm_host.c index 9634bbccfbdd..810e7421f644 100644 --- a/arch/x86/kvm/vmx/pkvm/pkvm_host.c +++ b/arch/x86/kvm/vmx/pkvm/pkvm_host.c @@ -280,6 +280,63 @@ static __init void init_host_state_area(struct pkvm_host_vcpu *vcpu) /*TODO: add HOST_RIP */ } +static __init void init_execution_control(struct vcpu_vmx *vmx, + struct vmcs_config *vmcs_config_ptr, + struct vmx_capability *vmx_cap) +{ + u32 cpu_based_exec_ctrl = vmcs_config_ptr->cpu_based_exec_ctrl; + u32 cpu_based_2nd_exec_ctrl = vmcs_config_ptr->cpu_based_2nd_exec_ctrl; + + pin_controls_set(vmx, vmcs_config_ptr->pin_based_exec_ctrl); + + /* + * CR3 LOAD/STORE EXITING are not used by pkvm + * INTR/NMI WINDOW EXITING are toggled dynamically + */ + cpu_based_exec_ctrl &= ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_INTR_WINDOW_EXITING | + CPU_BASED_NMI_WINDOW_EXITING); + exec_controls_set(vmx, cpu_based_exec_ctrl); + + /* disable EPT/VPID first, enable after EPT pgtable created */ + cpu_based_2nd_exec_ctrl &= ~(SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_ENABLE_VPID); + secondary_exec_controls_set(vmx, cpu_based_2nd_exec_ctrl); + + /* guest owns cr3 */ + vmcs_write32(CR3_TARGET_COUNT, 0); + + /* guest handles exception directly */ + vmcs_write32(EXCEPTION_BITMAP, 0); + + vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); + + /* + * guest owns cr0, and owns cr4 except VMXE bit. + * does not care about IA32_VMX_CRx_FIXED0/1 setting, so if guest modify + * cr0/cr4 conflicting with FIXED0/1, just let #GP happen. + * For example, as pKVM does not enable unrestricted guest, cr0.PE/PG + * must keep as 1 in guest. + */ + vmcs_writel(CR0_GUEST_HOST_MASK, 0); + vmcs_writel(CR4_GUEST_HOST_MASK, X86_CR4_VMXE); +} + +static __init void init_vmexit_control(struct vcpu_vmx *vmx, struct vmcs_config *vmcs_config_ptr) +{ + vm_exit_controls_set(vmx, vmcs_config_ptr->vmexit_ctrl); + vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); +} + +static __init void init_vmentry_control(struct vcpu_vmx *vmx, struct vmcs_config *vmcs_config_ptr) +{ + vm_entry_controls_set(vmx, vmcs_config_ptr->vmentry_ctrl); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); +} + static __init int pkvm_host_init_vmx(struct pkvm_host_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = &vcpu->vmx; @@ -305,6 +362,9 @@ static __init int pkvm_host_init_vmx(struct pkvm_host_vcpu *vcpu, int cpu) init_guest_state_area(vcpu, cpu); init_host_state_area(vcpu); + init_execution_control(vmx, &pkvm->vmcs_config, &pkvm->vmx_cap); + init_vmexit_control(vmx, &pkvm->vmcs_config); + init_vmentry_control(vmx, &pkvm->vmcs_config); return ret; } -- 2.25.1