To support execute only mappings on behalf of L1 hypervisors, we teach set_spte() to honor L1's valid XWR bits. This is only if host supports EPT execute only. Reuse ACC_USER_MASK to signify if the L1 hypervisor has the R bit set. Add a new variable "shadow_present_mask" that is set for non EPT cases and preserves the existing behavior for those cases. Signed-off-by: Bandan Das <bsd@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 15 ++++++++++++--- arch/x86/kvm/paging_tmpl.h | 8 +++++++- arch/x86/kvm/vmx.c | 7 +++++-- arch/x86/kvm/x86.c | 4 ++-- 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 69e62862..c0acc66 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1026,7 +1026,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 87b62dc..ae80aa4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -175,6 +175,7 @@ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; +static u64 __read_mostly shadow_present_mask; static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -282,13 +283,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) } void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; + shadow_present_mask = p_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -2515,13 +2517,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, kvm_pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { - u64 spte; + u64 spte = 0; int ret = 0; if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access)) return 0; - spte = PT_PRESENT_MASK; + /* + * In the non-EPT case, execonly is not valid and so + * the following line is equivalent to spte |= PT_PRESENT_MASK. + * For the EPT case, shadow_present_mask is 0 if hardware + * supports it and we honor whatever way the guest set it. + * See: FNAME(gpte_access) in paging_tmpl.h + */ + spte |= shadow_present_mask; if (!speculative) spte |= shadow_accessed_mask; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bc019f7..f2741db 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -181,13 +181,19 @@ no_present: return true; } +/* + * For PTTYPE_EPT, a page table can be executable but not readable + * on supported processors. Therefore, set_spte does not automatically + * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK + * to signify readability since it isn't used in the EPT case + */ static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte) { unsigned access; #if PTTYPE == PTTYPE_EPT access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) | ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) | - ACC_USER_MASK; + ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0); #else BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK); BUILD_BUG_ON(ACC_EXEC_MASK != 1); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64a79f2..f73b5dc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6366,10 +6366,13 @@ static __init int hardware_setup(void) vmx_disable_intercept_msr_write_x2apic(0x83f); if (enable_ept) { - kvm_mmu_set_mask_ptes(0ull, + kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK); + 0ull, VMX_EPT_EXECUTABLE_MASK, + cpu_has_vmx_ept_execute_only() ? + 0ull : PT_PRESENT_MASK); + BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); } else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7da5dd2..b15f214 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5867,8 +5867,8 @@ int kvm_arch_init(void *opaque) kvm_x86_ops = ops; kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0); - + PT_DIRTY_MASK, PT64_NX_MASK, 0, + PT_PRESENT_MASK); kvm_timer_init(); perf_register_guest_info_callbacks(&kvm_guest_cbs); -- 2.5.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html