The KVM MMIO support uses bit 51 as the reserved bit to cause nested page faults when a guest performs MMIO. The AMD memory encryption support uses CPUID functions to define the encryption bit position. Given this, KVM can't assume that bit 51 will be safe all the time. Add a callback to return a reserved bit(s) mask that can be used for the MMIO pagetable entries. The callback is not responsible for setting the present bit. If a callback is registered: - any non-zero mask returned is updated with the present bit and used as the MMIO SPTE mask. - a zero mask returned results in a mask with only bit 51 set (i.e. no present bit) as the MMIO SPTE mask, similar to the way 52-bit physical addressing is handled. If no callback is registered, the current method of setting the MMIO SPTE mask is used. Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs") Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx> --- arch/x86/include/asm/kvm_host.h | 4 ++- arch/x86/kvm/mmu/mmu.c | 54 +++++++++++++++++++++------------ arch/x86/kvm/x86.c | 2 +- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b79cd6aa4075..0c666c10f1a2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1233,6 +1233,8 @@ struct kvm_x86_ops { bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + + u64 (*get_reserved_mask)(void); }; struct kvm_arch_async_pf { @@ -1266,7 +1268,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) return -ENOTSUPP; } -int kvm_mmu_module_init(void); +int kvm_mmu_module_init(struct kvm_x86_ops *ops); void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f92b40d798c..d419df7a4056 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6227,30 +6227,44 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } -static void kvm_set_mmio_spte_mask(void) +static void kvm_set_mmio_spte_mask(struct kvm_x86_ops *ops) { u64 mask; - /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. - */ + if (ops->get_reserved_mask) { + mask = ops->get_reserved_mask(); - /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. - */ - mask = 1ull << 51; + /* + * If there are reserved bits available, add the present bit + * to the mask to generate a page fault with PFER.RSV = 1. + * If there are no reserved bits available, mask the uppermost + * physical address bit, but keep the present bit cleared. + */ + if (mask) + mask |= 1ull; + else + mask = 1ull << 51; + } else { + /* + * Set the reserved bits and the present bit of a + * paging-structure entry to generate page fault with + * PFER.RSV = 1. + */ - /* Set the present bit. */ - mask |= 1ull; + /* + * Mask the uppermost physical address bit, which would be + * reserved as long as the supported physical address width + * is less than 52. + */ + mask = 1ull << 51; - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) - mask &= ~1ull; + /* + * If reserved bit is not supported, don't set the present bit + * to disable mmio page fault. + */ + if (!IS_ENABLED(CONFIG_X86_64) || shadow_phys_bits != 52) + mask |= 1ull; + } kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); } @@ -6301,7 +6315,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) return 0; } -int kvm_mmu_module_init(void) +int kvm_mmu_module_init(struct kvm_x86_ops *ops) { int ret = -ENOMEM; @@ -6320,7 +6334,7 @@ int kvm_mmu_module_init(void) kvm_mmu_reset_all_pte_masks(); - kvm_set_mmio_spte_mask(); + kvm_set_mmio_spte_mask(ops); pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3ed167e039e5..311da4ed423d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7234,7 +7234,7 @@ int kvm_arch_init(void *opaque) goto out_free_x86_fpu_cache; } - r = kvm_mmu_module_init(); + r = kvm_mmu_module_init(ops); if (r) goto out_free_percpu; -- 2.17.1