Re: [PATCH 7/7] KVM: SVM: Flush cache only on CPUs running SEV guest

Tom Lendacky <thomas.lendacky@xxxxxxx> · Mon, 3 Mar 2025 10:53:57 -0600

On 2/26/25 19:48, Sean Christopherson wrote:
> From: Zheyun Shen <szy0127@xxxxxxxxxxx>
> 
> On AMD CPUs without ensuring cache consistency, each memory page
> reclamation in an SEV guest triggers a call to do WBNOINVD/WBINVD on all
> CPUs, thereby affecting the performance of other programs on the host.
> 
> Typically, an AMD server may have 128 cores or more, while the SEV guest
> might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity
> to bind these 8 vCPUs to specific physical CPUs.
> 
> Therefore, keeping a record of the physical core numbers each time a vCPU
> runs can help avoid flushing the cache for all CPUs every time.
> 
> Signed-off-by: Zheyun Shen <szy0127@xxxxxxxxxxx>
> Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> ---
>  arch/x86/kvm/svm/sev.c | 42 +++++++++++++++++++++++++++++++++++-------
>  arch/x86/kvm/svm/svm.h |  1 +
>  2 files changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 4238af23ab1b..b7a4cb728fba 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -447,6 +447,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
>  	ret = sev_platform_init(&init_args);
>  	if (ret)
>  		goto e_free;
> +	if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT))
> +		goto e_free;

Looks like there should be a "ret = -ENOMEM" before the goto.

Thanks,
Tom

>  
>  	/* This needs to happen after SEV/SNP firmware initialization. */
>  	if (vm_type == KVM_X86_SNP_VM) {
> @@ -706,16 +708,31 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
>  	}
>  }
>  
> -static void sev_writeback_caches(void)
> +static void sev_writeback_caches(struct kvm *kvm)
>  {
> +	/*
> +	 * Note, the caller is responsible for ensuring correctness if the mask
> +	 * can be modified, e.g. if a CPU could be doing VMRUN.
> +	 */
> +	if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
> +		return;
> +
>  	/*
>  	 * Ensure that all dirty guest tagged cache entries are written back
>  	 * before releasing the pages back to the system for use.  CLFLUSH will
>  	 * not do this without SME_COHERENT, and flushing many cache lines
>  	 * individually is slower than blasting WBINVD for large VMs, so issue
> -	 * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported).
> +	 * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
> +	 * on CPUs that have done VMRUN, i.e. may have dirtied data using the
> +	 * VM's ASID.
> +	 *
> +	 * For simplicity, never remove CPUs from the bitmap.  Ideally, KVM
> +	 * would clear the mask when flushing caches, but doing so requires
> +	 * serializing multiple calls and having responding CPUs (to the IPI)
> +	 * mark themselves as still running if they are running (or about to
> +	 * run) a vCPU for the VM.
>  	 */
> -	wbnoinvd_on_all_cpus();
> +	wbnoinvd_on_many_cpus(to_kvm_sev_info(kvm)->have_run_cpus);
>  }
>  
>  static unsigned long get_num_contig_pages(unsigned long idx,
> @@ -2766,7 +2783,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
>  		goto failed;
>  	}
>  
> -	sev_writeback_caches();
> +	sev_writeback_caches(kvm);
>  
>  	__unregister_enc_region_locked(kvm, region);
>  
> @@ -2914,6 +2931,7 @@ void sev_vm_destroy(struct kvm *kvm)
>  	}
>  
>  	sev_asid_free(sev);
> +	free_cpumask_var(sev->have_run_cpus);
>  }
>  
>  void __init sev_set_cpu_caps(void)
> @@ -3127,7 +3145,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
>  	return;
>  
>  do_sev_writeback_caches:
> -	sev_writeback_caches();
> +	sev_writeback_caches(vcpu->kvm);
>  }
>  
>  void sev_guest_memory_reclaimed(struct kvm *kvm)
> @@ -3140,7 +3158,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
>  	if (!sev_guest(kvm) || sev_snp_guest(kvm))
>  		return;
>  
> -	sev_writeback_caches();
> +	sev_writeback_caches(kvm);
>  }
>  
>  void sev_free_vcpu(struct kvm_vcpu *vcpu)
> @@ -3456,7 +3474,17 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
>  void pre_sev_run(struct vcpu_svm *svm, int cpu)
>  {
>  	struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
> -	unsigned int asid = sev_get_asid(svm->vcpu.kvm);
> +	struct kvm *kvm = svm->vcpu.kvm;
> +	unsigned int asid = sev_get_asid(kvm);
> +
> +	/*
> +	 * To optimize cache flushes when memory is reclaimed from an SEV VM,
> +	 * track physical CPUs that enter the guest for SEV VMs and thus can
> +	 * have encrypted, dirty data in the cache, and flush caches only for
> +	 * CPUs that have entered the guest.
> +	 */
> +	if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
> +		cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);
>  
>  	/* Assign the asid allocated with this SEV guest */
>  	svm->asid = asid;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 5b159f017055..6ad18ce5a754 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -112,6 +112,7 @@ struct kvm_sev_info {
>  	void *guest_req_buf;    /* Bounce buffer for SNP Guest Request input */
>  	void *guest_resp_buf;   /* Bounce buffer for SNP Guest Request output */
>  	struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
> +	cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
>  };
>  
>  struct kvm_svm {