On 2/26/25 19:48, Sean Christopherson wrote: > From: Zheyun Shen <szy0127@xxxxxxxxxxx> > > On AMD CPUs without ensuring cache consistency, each memory page > reclamation in an SEV guest triggers a call to do WBNOINVD/WBINVD on all > CPUs, thereby affecting the performance of other programs on the host. > > Typically, an AMD server may have 128 cores or more, while the SEV guest > might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity > to bind these 8 vCPUs to specific physical CPUs. > > Therefore, keeping a record of the physical core numbers each time a vCPU > runs can help avoid flushing the cache for all CPUs every time. > > Signed-off-by: Zheyun Shen <szy0127@xxxxxxxxxxx> > Co-developed-by: Sean Christopherson <seanjc@xxxxxxxxxx> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> > --- > arch/x86/kvm/svm/sev.c | 42 +++++++++++++++++++++++++++++++++++------- > arch/x86/kvm/svm/svm.h | 1 + > 2 files changed, 36 insertions(+), 7 deletions(-) > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 4238af23ab1b..b7a4cb728fba 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -447,6 +447,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, > ret = sev_platform_init(&init_args); > if (ret) > goto e_free; > + if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) > + goto e_free; Looks like there should be a "ret = -ENOMEM" before the goto. Thanks, Tom > > /* This needs to happen after SEV/SNP firmware initialization. */ > if (vm_type == KVM_X86_SNP_VM) { > @@ -706,16 +708,31 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) > } > } > > -static void sev_writeback_caches(void) > +static void sev_writeback_caches(struct kvm *kvm) > { > + /* > + * Note, the caller is responsible for ensuring correctness if the mask > + * can be modified, e.g. if a CPU could be doing VMRUN. > + */ > + if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus)) > + return; > + > /* > * Ensure that all dirty guest tagged cache entries are written back > * before releasing the pages back to the system for use. CLFLUSH will > * not do this without SME_COHERENT, and flushing many cache lines > * individually is slower than blasting WBINVD for large VMs, so issue > - * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported). > + * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported) > + * on CPUs that have done VMRUN, i.e. may have dirtied data using the > + * VM's ASID. > + * > + * For simplicity, never remove CPUs from the bitmap. Ideally, KVM > + * would clear the mask when flushing caches, but doing so requires > + * serializing multiple calls and having responding CPUs (to the IPI) > + * mark themselves as still running if they are running (or about to > + * run) a vCPU for the VM. > */ > - wbnoinvd_on_all_cpus(); > + wbnoinvd_on_many_cpus(to_kvm_sev_info(kvm)->have_run_cpus); > } > > static unsigned long get_num_contig_pages(unsigned long idx, > @@ -2766,7 +2783,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, > goto failed; > } > > - sev_writeback_caches(); > + sev_writeback_caches(kvm); > > __unregister_enc_region_locked(kvm, region); > > @@ -2914,6 +2931,7 @@ void sev_vm_destroy(struct kvm *kvm) > } > > sev_asid_free(sev); > + free_cpumask_var(sev->have_run_cpus); > } > > void __init sev_set_cpu_caps(void) > @@ -3127,7 +3145,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) > return; > > do_sev_writeback_caches: > - sev_writeback_caches(); > + sev_writeback_caches(vcpu->kvm); > } > > void sev_guest_memory_reclaimed(struct kvm *kvm) > @@ -3140,7 +3158,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm) > if (!sev_guest(kvm) || sev_snp_guest(kvm)) > return; > > - sev_writeback_caches(); > + sev_writeback_caches(kvm); > } > > void sev_free_vcpu(struct kvm_vcpu *vcpu) > @@ -3456,7 +3474,17 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) > void pre_sev_run(struct vcpu_svm *svm, int cpu) > { > struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); > - unsigned int asid = sev_get_asid(svm->vcpu.kvm); > + struct kvm *kvm = svm->vcpu.kvm; > + unsigned int asid = sev_get_asid(kvm); > + > + /* > + * To optimize cache flushes when memory is reclaimed from an SEV VM, > + * track physical CPUs that enter the guest for SEV VMs and thus can > + * have encrypted, dirty data in the cache, and flush caches only for > + * CPUs that have entered the guest. > + */ > + if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus)) > + cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus); > > /* Assign the asid allocated with this SEV guest */ > svm->asid = asid; > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 5b159f017055..6ad18ce5a754 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -112,6 +112,7 @@ struct kvm_sev_info { > void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */ > void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */ > struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */ > + cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */ > }; > > struct kvm_svm {