On Mon, Dec 16, 2024 at 10:51:13AM -0800, Sean Christopherson wrote: > @@ -1547,6 +1542,11 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) > (!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm))) > kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); > > + if (cpu_feature_enabled(X86_FEATURE_SRSO_MSR_FIX)) > + kvm_set_user_return_msr(zen4_bp_cfg_uret_slot, > + MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT, > + MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); I think this needs to be: if (cpu_feature_enabled(X86_FEATURE_SRSO_MSR_FIX)) kvm_set_user_return_msr(zen4_bp_cfg_uret_slot, BIT_ULL(MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT), BIT_ULL(MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT)); I.e., value and mask are both the 4th bit: (1 << 4) > svm->guest_state_loaded = true; > } > > @@ -5313,6 +5313,14 @@ static __init int svm_hardware_setup(void) > > tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); > > + if (cpu_feature_enabled(X86_FEATURE_SRSO_MSR_FIX)) { > + zen4_bp_cfg_uret_slot = kvm_add_user_return_msr(MSR_ZEN4_BP_CFG); > + if (WARN_ON_ONCE(zen4_bp_cfg_uret_slot) < 0) { > + r = -EIO; > + goto err; > + } And this needs to be: if (cpu_feature_enabled(X86_FEATURE_SRSO_MSR_FIX)) { zen4_bp_cfg_uret_slot = kvm_add_user_return_msr(MSR_ZEN4_BP_CFG); if (WARN_ON_ONCE(zen4_bp_cfg_uret_slot < 0)) { r = -EIO; goto err; } } Note the WARN_ON_ONCE bracketing. But I know you're doing this on purpose - to see if I'm paying attention and not taking your patch blindly :-P With that fixed, this approach still doesn't look sane to me: before I start the guest I have all SPEC_REDUCE bits correctly clear: # rdmsr -a 0xc001102e | uniq -c 128 420000 ... start a guest, shut it down cleanly, qemu exits properly... # rdmsr -a 0xc001102e | uniq -c 1 420010 6 420000 1 420010 3 420000 1 420010 3 420000 1 420010 1 420000 1 420010 6 420000 1 420010 1 420000 1 420010 6 420000 1 420010 5 420000 1 420010 18 420000 1 420010 5 420000 1 420010 6 420000 1 420010 3 420000 1 420010 3 420000 1 420010 1 420000 1 420010 6 420000 1 420010 1 420000 1 420010 6 420000 1 420010 5 420000 1 420010 18 420000 1 420010 5 420000 so SPEC_REDUCE remains set on some cores. Not good since I'm not running VMs anymore. # rmmod kvm_amd kvm # rdmsr -a 0xc001102e | uniq -c 128 420000 that looks more like it. Also, this user-return MSR toggling does show up higher in the profile: 4.31% qemu-system-x86 [kvm] [k] 0x000000000000d23f 2.44% qemu-system-x86 [kernel.kallsyms] [k] read_tsc 1.66% qemu-system-x86 [kernel.kallsyms] [k] native_write_msr 1.50% qemu-system-x86 [kernel.kallsyms] [k] native_write_msr_safe vs 1.01% qemu-system-x86 [kernel.kallsyms] [k] native_write_msr 0.81% qemu-system-x86 [kernel.kallsyms] [k] native_write_msr_safe so it really is noticeable. So I wanna say, let's do the below and be done with it. My expectation is that this won't be needed in the future anymore either so it'll be a noop on most machines... --- @@ -609,6 +609,9 @@ static void svm_disable_virtualization_cpu(void) kvm_cpu_svm_disable(); amd_pmu_disable_virt(); + + if (cpu_feature_enabled(X86_FEATURE_SRSO_MSR_FIX)) + msr_clear_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); } static int svm_enable_virtualization_cpu(void) @@ -686,6 +689,9 @@ static int svm_enable_virtualization_cpu(void) rdmsr(MSR_TSC_AUX, sev_es_host_save_area(sd)->tsc_aux, msr_hi); } + if (cpu_feature_enabled(X86_FEATURE_SRSO_MSR_FIX)) + msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT); + return 0; } -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette