Re: [PATCH RFC v9 04/51] KVM: x86: Determine shared/private faults using a configurable mask

Isaku Yamahata <isaku.yamahata@xxxxxxxxx> · Wed, 14 Jun 2023 09:47:09 -0700

On Sun, Jun 11, 2023 at 11:25:12PM -0500,
Michael Roth <michael.roth@xxxxxxx> wrote:

> This will be used to determine whether or not an #NPF should be serviced
> using a normal page vs. a guarded/gmem one.
> 
> Signed-off-by: Michael Roth <michael.roth@xxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  7 +++++++
>  arch/x86/kvm/mmu/mmu_internal.h | 35 ++++++++++++++++++++++++++++++++-
>  2 files changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b3bd24f2a390..c26f76641121 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1445,6 +1445,13 @@ struct kvm_arch {
>  	 */
>  #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
>  	struct kvm_mmu_memory_cache split_desc_cache;
> +
> +	/*
> +	 * When set, used to determine whether a fault should be treated as
> +	 * private in the context of protected VMs which use a separate gmem
> +	 * pool to back private guest pages.
> +	 */
> +	u64 mmu_private_fault_mask;
>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 780b91e1da9f..9b9e75aa43f4 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -252,6 +252,39 @@ struct kvm_page_fault {
>  
>  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
>  
> +static bool kvm_mmu_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 err)
> +{
> +	struct kvm_memory_slot *slot;
> +	bool private_fault = false;
> +	gfn_t gfn = gpa_to_gfn(gpa);
> +
> +	slot = gfn_to_memslot(kvm, gfn);
> +	if (!slot) {
> +		pr_debug("%s: no slot, GFN: 0x%llx\n", __func__, gfn);
> +		goto out;
> +	}
> +
> +	if (!kvm_slot_can_be_private(slot)) {
> +		pr_debug("%s: slot is not private, GFN: 0x%llx\n", __func__, gfn);
> +		goto out;
> +	}
> +
> +	if (kvm->arch.mmu_private_fault_mask) {
> +		private_fault = !!(err & kvm->arch.mmu_private_fault_mask);
> +		goto out;
> +	}

What's the convention of err? Can we abstract it by introducing a new bit
PFERR_PRIVATE_MASK? The caller sets it based on arch specific value.
the logic will be
        .is_private = err & PFERR_PRIVATE_MASK;

> +
> +	/*
> +	 * Handling below is for UPM self-tests and guests that treat userspace
> +	 * as the authority on whether a fault should be private or not.
> +	 */
> +	private_fault = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);

This code path is sad. One extra slot lookup and xarray look up.
Without mmu lock, the result can change by other vcpu.
Let's find a better way.

> +
> +out:
> +	pr_debug("%s: GFN: 0x%llx, private: %d\n", __func__, gfn, private_fault);
> +	return private_fault;
> +}
> +
>  /*
>   * Return values of handle_mmio_page_fault(), mmu.page_fault(), fast_page_fault(),
>   * and of course kvm_mmu_do_page_fault().
> @@ -301,7 +334,7 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  		.max_level = KVM_MAX_HUGEPAGE_LEVEL,
>  		.req_level = PG_LEVEL_4K,
>  		.goal_level = PG_LEVEL_4K,
> -		.is_private = kvm_mem_is_private(vcpu->kvm, cr2_or_gpa >> PAGE_SHIFT),
> +		.is_private = kvm_mmu_fault_is_private(vcpu->kvm, cr2_or_gpa, err),
>  	};
>  	int r;
>  
> -- 
> 2.25.1
> 

-- 
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>