Re: [PATCH v6 2/2] KVM: SEV: Prefer WBNOINVD over WBINVD for cache maintenance efficiency

Tom Lendacky <thomas.lendacky@xxxxxxx> · Tue, 4 Feb 2025 11:00:26 -0600



On 1/31/25 18:02, Kevin Loughlin wrote:
> AMD CPUs currently execute WBINVD in the host when unregistering SEV
> guest memory or when deactivating SEV guests. Such cache maintenance is
> performed to prevent data corruption, wherein the encrypted (C=1)
> version of a dirty cache line might otherwise only be written back
> after the memory is written in a different context (ex: C=0), yielding
> corruption. However, WBINVD is performance-costly, especially because
> it invalidates processor caches.
> 
> Strictly-speaking, unless the SEV ASID is being recycled (meaning the
> SNP firmware requires the use of WBINVD prior to DF_FLUSH), the cache
> invalidation triggered by WBINVD is unnecessary; only the writeback is
> needed to prevent data corruption in remaining scenarios.
> 
> To improve performance in these scenarios, use WBNOINVD when available
> instead of WBINVD. WBNOINVD still writes back all dirty lines
> (preventing host data corruption by SEV guests) but does *not*
> invalidate processor caches. Note that the implementation of wbnoinvd()
> ensures fall back to WBINVD if WBNOINVD is unavailable.
> 
> In anticipation of forthcoming optimizations to limit the WBNOINVD only
> to physical CPUs that have executed SEV guests, place the call to
> wbnoinvd_on_all_cpus() in a wrapper function sev_writeback_caches().
> 
> Signed-off-by: Kevin Loughlin <kevinloughlin@xxxxxxxxxx>
> Reviewed-by: Mingwei Zhang <mizhang@xxxxxxxxxx>

Reviewed-by: Tom Lendacky <thomas.lendacky@xxxxxxx>

> ---
>  arch/x86/kvm/svm/sev.c | 41 +++++++++++++++++++++--------------------
>  1 file changed, 21 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index fe6cc763fd51..f10f1c53345e 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -116,6 +116,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
>  	 */
>  	down_write(&sev_deactivate_lock);
>  
> +	/* SNP firmware requires use of WBINVD for ASID recycling. */
>  	wbinvd_on_all_cpus();
>  
>  	if (sev_snp_enabled)
> @@ -710,6 +711,16 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
>  	}
>  }
>  
> +static inline void sev_writeback_caches(void)
> +{
> +	/*
> +	 * Ensure that all dirty guest tagged cache entries are written back
> +	 * before releasing the pages back to the system for use. CLFLUSH will
> +	 * not do this without SME_COHERENT, so issue a WBNOINVD.
> +	 */
> +	wbnoinvd_on_all_cpus();
> +}
> +
>  static unsigned long get_num_contig_pages(unsigned long idx,
>  				struct page **inpages, unsigned long npages)
>  {
> @@ -2773,12 +2784,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
>  		goto failed;
>  	}
>  
> -	/*
> -	 * Ensure that all guest tagged cache entries are flushed before
> -	 * releasing the pages back to the system for use. CLFLUSH will
> -	 * not do this, so issue a WBINVD.
> -	 */
> -	wbinvd_on_all_cpus();
> +	sev_writeback_caches();
>  
>  	__unregister_enc_region_locked(kvm, region);
>  
> @@ -2899,12 +2905,7 @@ void sev_vm_destroy(struct kvm *kvm)
>  		return;
>  	}
>  
> -	/*
> -	 * Ensure that all guest tagged cache entries are flushed before
> -	 * releasing the pages back to the system for use. CLFLUSH will
> -	 * not do this, so issue a WBINVD.
> -	 */
> -	wbinvd_on_all_cpus();
> +	sev_writeback_caches();
>  
>  	/*
>  	 * if userspace was terminated before unregistering the memory regions
> @@ -3126,16 +3127,16 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
>  
>  	/*
>  	 * VM Page Flush takes a host virtual address and a guest ASID.  Fall
> -	 * back to WBINVD if this faults so as not to make any problems worse
> +	 * back to WBNOINVD if this faults so as not to make any problems worse
>  	 * by leaving stale encrypted data in the cache.
>  	 */
>  	if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
> -		goto do_wbinvd;
> +		goto do_sev_writeback_caches;
>  
>  	return;
>  
> -do_wbinvd:
> -	wbinvd_on_all_cpus();
> +do_sev_writeback_caches:
> +	sev_writeback_caches();
>  }
>  
>  void sev_guest_memory_reclaimed(struct kvm *kvm)
> @@ -3144,12 +3145,12 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
>  	 * With SNP+gmem, private/encrypted memory is unreachable via the
>  	 * hva-based mmu notifiers, so these events are only actually
>  	 * pertaining to shared pages where there is no need to perform
> -	 * the WBINVD to flush associated caches.
> +	 * the WBNOINVD to flush associated caches.
>  	 */
>  	if (!sev_guest(kvm) || sev_snp_guest(kvm))
>  		return;
>  
> -	wbinvd_on_all_cpus();
> +	sev_writeback_caches();
>  }
>  
>  void sev_free_vcpu(struct kvm_vcpu *vcpu)
> @@ -3858,7 +3859,7 @@ static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu)
>  		 * guest-mapped page rather than the initial one allocated
>  		 * by KVM in svm->sev_es.vmsa. In theory, svm->sev_es.vmsa
>  		 * could be free'd and cleaned up here, but that involves
> -		 * cleanups like wbinvd_on_all_cpus() which would ideally
> +		 * cleanups like sev_writeback_caches() which would ideally
>  		 * be handled during teardown rather than guest boot.
>  		 * Deferring that also allows the existing logic for SEV-ES
>  		 * VMSAs to be re-used with minimal SNP-specific changes.
> @@ -4910,7 +4911,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
>  
>  		/*
>  		 * SEV-ES avoids host/guest cache coherency issues through
> -		 * WBINVD hooks issued via MMU notifiers during run-time, and
> +		 * WBNOINVD hooks issued via MMU notifiers during run-time, and
>  		 * KVM's VM destroy path at shutdown. Those MMU notifier events
>  		 * don't cover gmem since there is no requirement to map pages
>  		 * to a HVA in order to use them for a running guest. While the