Add support for "VMWRITE to any supported field in the VMCS" and enable this feature by default in L1's IA32_VMX_MISC MSR. If userspace clears the VMX capability bit, the old behavior will be restored. Note that this feature is a prerequisite for kvm in L1 to use VMCS shadowing, once that feature is available. Signed-off-by: Jim Mattson <jmattson@xxxxxxxxxx> --- arch/x86/kvm/vmx.c | 69 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5ea57442fef9..8a87caf452a3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1694,6 +1694,17 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low); } +/* + * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE + * to modify any valid field of the VMCS, or are the VM-exit + * information fields read-only? + */ +static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.misc_low & + MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; +} + static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) { return vmcs12->cpu_based_vm_exec_control & bit; @@ -3311,6 +3322,7 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->misc_high); msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= + MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT; msrs->misc_high = 0; @@ -3484,6 +3496,15 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) vmx->nested.msrs.misc_low = data; vmx->nested.msrs.misc_high = data >> 32; + + /* + * If L1 has read-only VM-exit information fields, use the + * less permissive vmx_vmwrite_bitmap to specify write + * permissions for the shadow VMCS. + */ + if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); + return 0; } @@ -6154,8 +6175,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) int i; if (enable_shadow_vmcs) { + /* + * At vCPU creation, "VMWRITE to any supported field + * in the VMCS" is supported, so use the more + * permissive vmx_vmread_bitmap to specify both read + * and write permissions for the shadow VMCS. + */ vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); - vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap)); } if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap)); @@ -8136,23 +8163,42 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu, } +/* + * Copy the writable VMCS shadow fields back to the VMCS12, in case + * they have been modified by the L1 guest. Note that the "read-only" + * VM-exit information fields are actually writable if the vCPU is + * configured to support "VMWRITE to any supported field in the VMCS." + */ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) { - int i; + const u16 *fields[] = { + shadow_read_write_fields, + shadow_read_only_fields + }; + const int max_fields[] = { + max_shadow_read_write_fields, + max_shadow_read_only_fields + }; + int i, q; unsigned long field; u64 field_value; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; - const u16 *fields = shadow_read_write_fields; - const int num_fields = max_shadow_read_write_fields; preempt_disable(); vmcs_load(shadow_vmcs); - for (i = 0; i < num_fields; i++) { - field = fields[i]; - field_value = __vmcs_readl(field); - vmcs12_write_any(&vmx->vcpu, field, field_value); + for (q = 0; q < ARRAY_SIZE(fields); q++) { + for (i = 0; i < max_fields[q]; i++) { + field = fields[q][i]; + field_value = __vmcs_readl(field); + vmcs12_write_any(&vmx->vcpu, field, field_value); + } + /* + * Skip the VM-exit information fields if they are read-only. + */ + if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) + break; } vmcs_clear(shadow_vmcs); @@ -8286,7 +8332,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); - if (vmcs_field_readonly(field)) { + /* + * If the vCPU supports "VMWRITE to any supported field in the + * VMCS," then the "read-only" fields are actually read/write. + */ + if (vmcs_field_readonly(field) && + !nested_cpu_has_vmwrite_any_field(vcpu)) { nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); return kvm_skip_emulated_instruction(vcpu); -- 2.17.0.921.gf22659ad46-goog