From: Nikunj A Dadhania <nikunj@xxxxxxx> Malicious guests can cause bus locks to degrade the performance of system. Non-WB(write-back) and misaligned locked RMW(read-modify-write) instructions are referred to as "bus locks" and require system wide synchronization among all processors to guarantee atomicity. Bus locks may incur significant performance penalties for all processors in the system. The Bus Lock Threshold feature proves beneficial for hypervisors seeking to restrict guests' ability to initiate numerous bus locks, thereby preventing system slowdowns that affect all tenants. Support for the buslock threshold is indicated via CPUID function 0x8000000A_EDX[29]. VMCB intercept bit VMCB Offset Bits Function 14h 5 Intercept bus lock operations (occurs after guest instruction finishes) Bus lock threshold VMCB Offset Bits Function 120h 15:0 Bus lock counter Use the KVM capability KVM_CAP_X86_BUS_LOCK_EXIT to enable the feature. When the bus lock threshold counter reaches to zero, KVM will exit to user space by setting KVM_RUN_BUS_LOCK in vcpu->run->flags in bus_lock_exit handler, indicating that a bus lock has been detected in the guest. More details about the Bus Lock Threshold feature can be found in AMD APM [1]. [1]: AMD64 Architecture Programmer's Manual Pub. 24593, April 2024, Vol 2, 15.14.5 Bus Lock Threshold. https://bugzilla.kernel.org/attachment.cgi?id=306250 [Manali: - Added exit reason string for SVM_EXIT_BUS_LOCK. - Moved enablement and disablement of bus lock intercept support. to svm_vcpu_after_set_cpuid(). - Massage commit message. - misc cleanups. ] Signed-off-by: Nikunj A Dadhania <nikunj@xxxxxxx> Co-developed-by: Manali Shukla <manali.shukla@xxxxxxx> Signed-off-by: Manali Shukla <manali.shukla@xxxxxxx> --- arch/x86/include/asm/svm.h | 5 +++- arch/x86/include/uapi/asm/svm.h | 2 ++ arch/x86/kvm/svm/svm.c | 43 +++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 1 + 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 728c98175b9c..538b7d60b05c 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -116,6 +116,7 @@ enum { INTERCEPT_INVPCID, INTERCEPT_MCOMMIT, INTERCEPT_TLBSYNC, + INTERCEPT_BUSLOCK, }; @@ -158,7 +159,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u64 avic_physical_id; /* Offset 0xf8 */ u8 reserved_7[8]; u64 vmsa_pa; /* Used for an SEV-ES guest */ - u8 reserved_8[720]; + u8 reserved_8[16]; + u16 bus_lock_counter; /* Offset 0x120 */ + u8 reserved_9[702]; /* * Offset 0x3e0, 32 bytes reserved * for use by hypervisor/software. diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index 80e1df482337..dcce3ca367e9 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUS_LOCK 0x0a5 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -223,6 +224,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_BUS_LOCK, "buslock" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7d396f5fa010..9f1d51384eac 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -191,6 +191,9 @@ module_param(pause_filter_count_shrink, ushort, 0444); static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; module_param(pause_filter_count_max, ushort, 0444); +static unsigned short bus_lock_counter = KVM_SVM_DEFAULT_BUS_LOCK_COUNTER; +module_param(bus_lock_counter, ushort, 0644); + /* * Use nested page tables by default. Note, NPT may get forced off by * svm_hardware_setup() if it's unsupported by hardware or the host kernel. @@ -3231,6 +3234,19 @@ static int invpcid_interception(struct kvm_vcpu *vcpu) return kvm_handle_invpcid(vcpu, type, gva); } +static int bus_lock_exit(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK; + vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK; + + /* Reload the counter again */ + svm->vmcb->control.bus_lock_counter = bus_lock_counter; + + return 0; +} + static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, @@ -3298,6 +3314,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_CR4_WRITE_TRAP] = cr_trap, [SVM_EXIT_CR8_WRITE_TRAP] = cr_trap, [SVM_EXIT_INVPCID] = invpcid_interception, + [SVM_EXIT_BUS_LOCK] = bus_lock_exit, [SVM_EXIT_NPF] = npf_interception, [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, @@ -4356,6 +4373,27 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0, !!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D)); + if (cpu_feature_enabled(X86_FEATURE_BUS_LOCK_THRESHOLD) && + vcpu->kvm->arch.bus_lock_detection_enabled) { + svm_set_intercept(svm, INTERCEPT_BUSLOCK); + + /* + * The CPU decrements the bus lock counter every time a bus lock + * is detected. Once the counter reaches zero a VMEXIT_BUSLOCK + * is generated. A value of zero for bus lock counter means a + * VMEXIT_BUSLOCK at every bus lock detection. + * + * Currently, default value for bus_lock_counter is set to 10. + * So, the VMEXIT_BUSLOCK is generated after every 10 bus locks + * detected. + */ + svm->vmcb->control.bus_lock_counter = bus_lock_counter; + pr_debug("Setting buslock counter to %u\n", bus_lock_counter); + } else { + svm_clr_intercept(svm, INTERCEPT_BUSLOCK); + svm->vmcb->control.bus_lock_counter = 0; + } + if (sev_guest(vcpu->kvm)) sev_vcpu_after_set_cpuid(svm); @@ -5149,6 +5187,11 @@ static __init void svm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK); } + if (cpu_feature_enabled(X86_FEATURE_BUS_LOCK_THRESHOLD)) { + pr_info("Bus Lock Threashold supported\n"); + kvm_caps.has_bus_lock_exit = true; + } + /* CPUID 0x80000008 */ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || boot_cpu_has(X86_FEATURE_AMD_SSBD)) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index d80a4c6b5a38..2a77232105da 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -58,6 +58,7 @@ void kvm_spurious_fault(void); #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX #define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX #define KVM_SVM_DEFAULT_PLE_WINDOW 3000 +#define KVM_SVM_DEFAULT_BUS_LOCK_COUNTER 10 static inline unsigned int __grow_ple_window(unsigned int val, unsigned int base, unsigned int modifier, unsigned int max) -- 2.34.1