Re: [PATCH v1 1/2] KVM: x86/mmu: Allow for overriding MMIO SPTE mask

Tom Lendacky <thomas.lendacky@xxxxxxx> · Wed, 18 Dec 2019 13:51:23 -0600

On 12/18/19 1:45 PM, Tom Lendacky wrote:
> The KVM MMIO support uses bit 51 as the reserved bit to cause nested page
> faults when a guest performs MMIO. The AMD memory encryption support uses
> CPUID functions to define the encryption bit position. Given this, KVM
> can't assume that bit 51 will be safe all the time.
> 
> Add a callback to return a reserved bit(s) mask that can be used for the
> MMIO pagetable entries. The callback is not responsible for setting the
> present bit.
> 
> If a callback is registered:
>   - any non-zero mask returned is updated with the present bit and used
>     as the MMIO SPTE mask.
>   - a zero mask returned results in a mask with only bit 51 set (i.e. no
>     present bit) as the MMIO SPTE mask, similar to the way 52-bit physical
>     addressing is handled.
> 
> If no callback is registered, the current method of setting the MMIO SPTE
> mask is used.
> 
> Fixes: 28a1f3ac1d0c ("kvm: x86: Set highest physical address bits in non-present/reserved SPTEs")
> Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  4 ++-
>  arch/x86/kvm/mmu/mmu.c          | 54 +++++++++++++++++++++------------
>  arch/x86/kvm/x86.c              |  2 +-
>  3 files changed, 38 insertions(+), 22 deletions(-)

This patch has some extra churn because kvm_x86_ops isn't set yet when the
call to kvm_set_mmio_spte_mask() is made. If it's not a problem to move
setting kvm_x86_ops just a bit earlier in kvm_arch_init(), some of the
churn can be avoided.

Thanks,
Tom

> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b79cd6aa4075..0c666c10f1a2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1233,6 +1233,8 @@ struct kvm_x86_ops {
>  
>  	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
>  	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> +
> +	u64 (*get_reserved_mask)(void);
>  };
>  
>  struct kvm_arch_async_pf {
> @@ -1266,7 +1268,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
>  		return -ENOTSUPP;
>  }
>  
> -int kvm_mmu_module_init(void);
> +int kvm_mmu_module_init(struct kvm_x86_ops *ops);
>  void kvm_mmu_module_exit(void);
>  
>  void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 6f92b40d798c..d419df7a4056 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -6227,30 +6227,44 @@ static void mmu_destroy_caches(void)
>  	kmem_cache_destroy(mmu_page_header_cache);
>  }
>  
> -static void kvm_set_mmio_spte_mask(void)
> +static void kvm_set_mmio_spte_mask(struct kvm_x86_ops *ops)
>  {
>  	u64 mask;
>  
> -	/*
> -	 * Set the reserved bits and the present bit of an paging-structure
> -	 * entry to generate page fault with PFER.RSV = 1.
> -	 */
> +	if (ops->get_reserved_mask) {
> +		mask = ops->get_reserved_mask();
>  
> -	/*
> -	 * Mask the uppermost physical address bit, which would be reserved as
> -	 * long as the supported physical address width is less than 52.
> -	 */
> -	mask = 1ull << 51;
> +		/*
> +		 * If there are reserved bits available, add the present bit
> +		 * to the mask to generate a page fault with PFER.RSV = 1.
> +		 * If there are no reserved bits available, mask the uppermost
> +		 * physical address bit, but keep the present bit cleared.
> +		 */
> +		if (mask)
> +			mask |= 1ull;
> +		else
> +			mask = 1ull << 51;
> +	} else {
> +		/*
> +		 * Set the reserved bits and the present bit of a
> +		 * paging-structure entry to generate page fault with
> +		 * PFER.RSV = 1.
> +		 */
>  
> -	/* Set the present bit. */
> -	mask |= 1ull;
> +		/*
> +		 * Mask the uppermost physical address bit, which would be
> +		 * reserved as long as the supported physical address width
> +		 * is less than 52.
> +		 */
> +		mask = 1ull << 51;
>  
> -	/*
> -	 * If reserved bit is not supported, clear the present bit to disable
> -	 * mmio page fault.
> -	 */
> -	if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
> -		mask &= ~1ull;
> +		/*
> +		 * If reserved bit is not supported, don't set the present bit
> +		 * to disable mmio page fault.
> +		 */
> +		if (!IS_ENABLED(CONFIG_X86_64) || shadow_phys_bits != 52)
> +			mask |= 1ull;
> +	}
>  
>  	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
>  }
> @@ -6301,7 +6315,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
>  	return 0;
>  }
>  
> -int kvm_mmu_module_init(void)
> +int kvm_mmu_module_init(struct kvm_x86_ops *ops)
>  {
>  	int ret = -ENOMEM;
>  
> @@ -6320,7 +6334,7 @@ int kvm_mmu_module_init(void)
>  
>  	kvm_mmu_reset_all_pte_masks();
>  
> -	kvm_set_mmio_spte_mask();
> +	kvm_set_mmio_spte_mask(ops);
>  
>  	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
>  					    sizeof(struct pte_list_desc),
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3ed167e039e5..311da4ed423d 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7234,7 +7234,7 @@ int kvm_arch_init(void *opaque)
>  		goto out_free_x86_fpu_cache;
>  	}
>  
> -	r = kvm_mmu_module_init();
> +	r = kvm_mmu_module_init(ops);
>  	if (r)
>  		goto out_free_percpu;
>  
>