On 12/18/19 1:45 PM, Tom Lendacky wrote: > The KVM MMIO support uses bit 51 as the reserved bit to cause nested page > faults when a guest performs MMIO. The AMD memory encryption support uses > CPUID functions to define the encryption bit position. Given this, KVM > can't assume that bit 51 will be safe all the time. > > Add a callback to return a reserved bit(s) mask that can be used for the > MMIO pagetable entries. The callback is not responsible for setting the > present bit. > > If a callback is registered: > - any non-zero mask returned is updated with the present bit and used > as the MMIO SPTE mask. > - a zero mask returned results in a mask with only bit 51 set (i.e. no > present bit) as the MMIO SPTE mask, similar to the way 52-bit physical > addressing is handled. > > If no callback is registered, the current method of setting the MMIO SPTE > mask is used. > > Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs") > Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 4 ++- > arch/x86/kvm/mmu/mmu.c | 54 +++++++++++++++++++++------------ > arch/x86/kvm/x86.c | 2 +- > 3 files changed, 38 insertions(+), 22 deletions(-) This patch has some extra churn because kvm_x86_ops isn't set yet when the call to kvm_set_mmio_spte_mask() is made. If it's not a problem to move setting kvm_x86_ops just a bit earlier in kvm_arch_init(), some of the churn can be avoided. Thanks, Tom > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index b79cd6aa4075..0c666c10f1a2 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1233,6 +1233,8 @@ struct kvm_x86_ops { > > bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > + > + u64 (*get_reserved_mask)(void); > }; > > struct kvm_arch_async_pf { > @@ -1266,7 +1268,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) > return -ENOTSUPP; > } > > -int kvm_mmu_module_init(void); > +int kvm_mmu_module_init(struct kvm_x86_ops *ops); > void kvm_mmu_module_exit(void); > > void kvm_mmu_destroy(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c > index 6f92b40d798c..d419df7a4056 100644 > --- a/arch/x86/kvm/mmu/mmu.c > +++ b/arch/x86/kvm/mmu/mmu.c > @@ -6227,30 +6227,44 @@ static void mmu_destroy_caches(void) > kmem_cache_destroy(mmu_page_header_cache); > } > > -static void kvm_set_mmio_spte_mask(void) > +static void kvm_set_mmio_spte_mask(struct kvm_x86_ops *ops) > { > u64 mask; > > - /* > - * Set the reserved bits and the present bit of an paging-structure > - * entry to generate page fault with PFER.RSV = 1. > - */ > + if (ops->get_reserved_mask) { > + mask = ops->get_reserved_mask(); > > - /* > - * Mask the uppermost physical address bit, which would be reserved as > - * long as the supported physical address width is less than 52. > - */ > - mask = 1ull << 51; > + /* > + * If there are reserved bits available, add the present bit > + * to the mask to generate a page fault with PFER.RSV = 1. > + * If there are no reserved bits available, mask the uppermost > + * physical address bit, but keep the present bit cleared. > + */ > + if (mask) > + mask |= 1ull; > + else > + mask = 1ull << 51; > + } else { > + /* > + * Set the reserved bits and the present bit of a > + * paging-structure entry to generate page fault with > + * PFER.RSV = 1. > + */ > > - /* Set the present bit. */ > - mask |= 1ull; > + /* > + * Mask the uppermost physical address bit, which would be > + * reserved as long as the supported physical address width > + * is less than 52. > + */ > + mask = 1ull << 51; > > - /* > - * If reserved bit is not supported, clear the present bit to disable > - * mmio page fault. > - */ > - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) > - mask &= ~1ull; > + /* > + * If reserved bit is not supported, don't set the present bit > + * to disable mmio page fault. > + */ > + if (!IS_ENABLED(CONFIG_X86_64) || shadow_phys_bits != 52) > + mask |= 1ull; > + } > > kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); > } > @@ -6301,7 +6315,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) > return 0; > } > > -int kvm_mmu_module_init(void) > +int kvm_mmu_module_init(struct kvm_x86_ops *ops) > { > int ret = -ENOMEM; > > @@ -6320,7 +6334,7 @@ int kvm_mmu_module_init(void) > > kvm_mmu_reset_all_pte_masks(); > > - kvm_set_mmio_spte_mask(); > + kvm_set_mmio_spte_mask(ops); > > pte_list_desc_cache = kmem_cache_create("pte_list_desc", > sizeof(struct pte_list_desc), > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 3ed167e039e5..311da4ed423d 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -7234,7 +7234,7 @@ int kvm_arch_init(void *opaque) > goto out_free_x86_fpu_cache; > } > > - r = kvm_mmu_module_init(); > + r = kvm_mmu_module_init(ops); > if (r) > goto out_free_percpu; > >