Re: [PATCH v9 5/6] KVM: x86: Migrate to __kvm_follow_pfn

Maxim Levitsky <mlevitsk@xxxxxxxxxx> · Tue, 03 Oct 2023 19:54:48 +0300

У пн, 2023-09-11 у 11:16 +0900, David Stevens пише:
> From: David Stevens <stevensd@xxxxxxxxxxxx>
> 
> Migrate functions which need access to is_refcounted_page to
> __kvm_follow_pfn. The functions which need this are __kvm_faultin_pfn
> and reexecute_instruction. The former requires replacing the async
> in/out parameter with FOLL_NOWAIT parameter and the KVM_PFN_ERR_NEEDS_IO
> return value. Handling non-refcounted pages is complicated, so it will
> be done in a followup. The latter is a straightforward refactor.
> 
> APIC related callers do not need to migrate because KVM controls the
> memslot, so it will always be regular memory. Prefetch related callers
> do not need to be migrated because atomic gfn_to_pfn calls can never
> make it to hva_to_pfn_remapped.
> 
> Signed-off-by: David Stevens <stevensd@xxxxxxxxxxxx>
> ---
>  arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++++++++++----------
>  arch/x86/kvm/x86.c     | 12 ++++++++++--
>  2 files changed, 43 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e1d011c67cc6..e1eca26215e2 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4254,7 +4254,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
>  static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>  {
>  	struct kvm_memory_slot *slot = fault->slot;
> -	bool async;
> +	struct kvm_follow_pfn foll = {
> +		.slot = slot,
> +		.gfn = fault->gfn,
> +		.flags = fault->write ? FOLL_WRITE : 0,
> +		.try_map_writable = true,
> +		.guarded_by_mmu_notifier = true,
> +		.allow_non_refcounted_struct_page = false,
> +	};
>  
>  	/*
>  	 * Retry the page fault if the gfn hit a memslot that is being deleted
> @@ -4283,12 +4290,20 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  			return RET_PF_EMULATE;
>  	}
>  
> -	async = false;
> -	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
> -					  fault->write, &fault->map_writable,
> -					  &fault->hva);
> -	if (!async)
> -		return RET_PF_CONTINUE; /* *pfn has correct page already */
> +	foll.flags |= FOLL_NOWAIT;
> +	fault->pfn = __kvm_follow_pfn(&foll);
> +
> +	if (!is_error_noslot_pfn(fault->pfn))
> +		goto success;
Unrelated but I can't say I like the 'is_error_noslot_pfn()' name, 
I wish it was called something like is_valid_pfn().

> +
> +	/*
> +	 * If __kvm_follow_pfn() failed because I/O is needed to fault in the
> +	 * page, then either set up an asynchronous #PF to do the I/O, or if
> +	 * doing an async #PF isn't possible, retry __kvm_follow_pfn() with
> +	 * I/O allowed. All other failures are fatal, i.e. retrying won't help.
> +	 */
> +	if (fault->pfn != KVM_PFN_ERR_NEEDS_IO)
> +		return RET_PF_CONTINUE;
>  
>  	if (!fault->prefetch && kvm_can_do_async_pf(vcpu)) {
>  		trace_kvm_try_async_get_page(fault->addr, fault->gfn);
> @@ -4306,9 +4321,17 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  	 * to wait for IO.  Note, gup always bails if it is unable to quickly
>  	 * get a page and a fatal signal, i.e. SIGKILL, is pending.
>  	 */
> -	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL,
> -					  fault->write, &fault->map_writable,
> -					  &fault->hva);
> +	foll.flags |= FOLL_INTERRUPTIBLE;
> +	foll.flags &= ~FOLL_NOWAIT;
> +	fault->pfn = __kvm_follow_pfn(&foll);
> +
> +	if (!is_error_noslot_pfn(fault->pfn))
> +		goto success;
> +
> +	return RET_PF_CONTINUE;
> +success:
> +	fault->hva = foll.hva;
> +	fault->map_writable = foll.writable;
>  	return RET_PF_CONTINUE;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6c9c81e82e65..2011a7e47296 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8556,6 +8556,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  {
>  	gpa_t gpa = cr2_or_gpa;
>  	kvm_pfn_t pfn;
> +	struct kvm_follow_pfn foll;
>  
>  	if (!(emulation_type & EMULTYPE_ALLOW_RETRY_PF))
>  		return false;
> @@ -8585,7 +8586,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	 * retry instruction -> write #PF -> emulation fail -> retry
>  	 * instruction -> ...
>  	 */
> -	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
> +	foll = (struct kvm_follow_pfn) {
> +		.slot = gfn_to_memslot(vcpu->kvm, gpa_to_gfn(gpa)),
> +		.gfn = gpa_to_gfn(gpa),
> +		.flags = FOLL_WRITE,
> +		.allow_non_refcounted_struct_page = true,
> +	};
> +	pfn = __kvm_follow_pfn(&foll);
>  
>  	/*
>  	 * If the instruction failed on the error pfn, it can not be fixed,
> @@ -8594,7 +8601,8 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	if (is_error_noslot_pfn(pfn))
>  		return false;
>  
> -	kvm_release_pfn_clean(pfn);
> +	if (foll.is_refcounted_page)
> +		kvm_release_page_clean(pfn_to_page(pfn));
>  
>  	/* The instructions are well-emulated on direct mmu. */
>  	if (vcpu->arch.mmu->root_role.direct) {

Overall looks good, I might have missed something.

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
	Maxim Levitsky