On 9/4/23 04:53, Manali Shukla wrote:
From: Santosh Shukla <santosh.shukla@xxxxxxx>
IBS virtualization (VIBS) [1] feature allows the guest to collect IBS
samples without exiting the guest.
There are 2 parts to this feature
- Virtualizing the IBS register state.
- Ensuring the IBS interrupt is handled in the guest without exiting
to the hypervisor.
IBS virtualization requires the use of AVIC or NMI virtualization for
delivery of a virtualized interrupt from IBS hardware in the guest.
Without the virtualized interrupt delivery, the IBS interrupt
occurring in the guest will not be delivered to either the guest or
the hypervisor. When AVIC is enabled, IBS LVT entry (Extended
Interrupt 0 LVT) message type should be programmed to INTR or NMI.
So, when the sampled interval for the data collection for IBS fetch/op
block is over, VIBS hardware is going to generate a Virtual NMI, but
the source of Virtual NMI is different in both AVIC enabled/disabled
case.
1) when AVIC is enabled, Virtual NMI is generated via AVIC using
extended LVT (EXTLVT).
2) When AVIC is disabled, Virtual NMI is directly generated from
hardware.
Since IBS registers falls under swap type C [2], only the guest state is
saved and restored automatically by the hardware. Host state needs to be
saved and restored manually by the hypervisor. Note that, saving and
restoring of host IBS state happens only when IBS is active on host. to
avoid unnecessary rdmsrs/wrmsrs. Hypervisor needs to disable host IBS
before VMRUN and re-enable it after VMEXIT [1].
The IBS virtualization feature for non SEV-ES guests is not enabled in
this patch. Later patches enable VIBS for non SEV-ES guests.
[1]: https://bugzilla.kernel.org/attachment.cgi?id=304653
AMD64 Architecture Programmer’s Manual, Vol 2, Section 15.38
Instruction-Based Sampling Virtualization.
[2]: https://bugzilla.kernel.org/attachment.cgi?id=304653
AMD64 Architecture Programmer’s Manual, Vol 2, Appendix B Layout
of VMCB, Table B-3 Swap Types.
Signed-off-by: Santosh Shukla <santosh.shukla@xxxxxxx>
Co-developed-by: Manali Shukla <manali.shukla@xxxxxxx>
Signed-off-by: Manali Shukla <manali.shukla@xxxxxxx>
---
arch/x86/kvm/svm/svm.c | 172 ++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/svm/svm.h | 4 +-
2 files changed, 173 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 20fe83eb32ee..6f566ed93f4c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -139,6 +139,22 @@ static const struct svm_direct_access_msrs {
{ .index = X2APIC_MSR(APIC_TMICT), .always = false },
{ .index = X2APIC_MSR(APIC_TMCCT), .always = false },
{ .index = X2APIC_MSR(APIC_TDCR), .always = false },
+ { .index = MSR_AMD64_IBSFETCHCTL, .always = false },
+ { .index = MSR_AMD64_IBSFETCHLINAD, .always = false },
+ { .index = MSR_AMD64_IBSOPCTL, .always = false },
+ { .index = MSR_AMD64_IBSOPRIP, .always = false },
+ { .index = MSR_AMD64_IBSOPDATA, .always = false },
+ { .index = MSR_AMD64_IBSOPDATA2, .always = false },
+ { .index = MSR_AMD64_IBSOPDATA3, .always = false },
+ { .index = MSR_AMD64_IBSDCLINAD, .always = false },
+ { .index = MSR_AMD64_IBSBRTARGET, .always = false },
+ { .index = MSR_AMD64_ICIBSEXTDCTL, .always = false },
+ { .index = X2APIC_MSR(APIC_EFEAT), .always = false },
+ { .index = X2APIC_MSR(APIC_ECTRL), .always = false },
+ { .index = X2APIC_MSR(APIC_EILVTn(0)), .always = false },
+ { .index = X2APIC_MSR(APIC_EILVTn(1)), .always = false },
+ { .index = X2APIC_MSR(APIC_EILVTn(2)), .always = false },
+ { .index = X2APIC_MSR(APIC_EILVTn(3)), .always = false },
Why not keep these X2APIC_MSR registers with the other X2APIC_MSR registers?
{ .index = MSR_INVALID, .always = false },
};
@@ -217,6 +233,10 @@ module_param(vgif, int, 0444);
static int lbrv = true;
module_param(lbrv, int, 0444);
+/* enable/disable IBS virtualization */
+static int vibs;
+module_param(vibs, int, 0444);
+
static int tsc_scaling = true;
module_param(tsc_scaling, int, 0444);
@@ -1050,6 +1070,20 @@ void disable_nmi_singlestep(struct vcpu_svm *svm)
}
}
+void svm_ibs_msr_interception(struct vcpu_svm *svm, bool intercept)
+{
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSFETCHCTL, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSFETCHLINAD, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSOPCTL, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSOPRIP, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSOPDATA, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSOPDATA2, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSOPDATA3, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSDCLINAD, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_IBSBRTARGET, !intercept, !intercept);
+ set_msr_interception(&svm->vcpu, svm->msrpm, MSR_AMD64_ICIBSEXTDCTL, !intercept, !intercept);
+}
+
static void grow_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1207,6 +1241,29 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
/* No need to intercept these MSRs */
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
+
+ /*
+ * If hardware supports VIBS then no need to intercept IBS MSRS
+ * when VIBS is enabled in guest.
+ */
+ if (vibs) {
+ if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_IBS)) {
+ svm_ibs_msr_interception(svm, false);
+ svm->ibs_enabled = true;
+
+ /*
+ * In order to enable VIBS, AVIC/VNMI must be enabled to handle the
+ * interrupt generated by IBS driver. When AVIC is enabled, once
+ * data collection for IBS fetch/op block for sampled interval
+ * provided is done, hardware signals VNMI which is generated via
+ * AVIC which uses extended LVT registers. That is why extended LVT
+ * registers are initialized at guest startup.
+ */
+ kvm_apic_init_eilvt_regs(vcpu);
+ } else {
+ svm->ibs_enabled = false;
Can svm->ibs_enabled have previously been true? If so, then you would need
to reset the msr interception. If not, then this can be simplified to
if (vibs && guest_cpuid_has(&svm->vcpu, X86_FEATURE_IBS)) {
...
}
without an else path.
+ }
+ }
}
}
@@ -2888,6 +2945,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_DE_CFG:
msr_info->data = svm->msr_decfg;
break;
+
+ case MSR_AMD64_IBSCTL:
+ rdmsrl(MSR_AMD64_IBSCTL, msr_info->data);
+ break;
+
default:
return kvm_get_msr_common(vcpu, msr_info);
}
@@ -4038,19 +4100,111 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
+/*
+ * Since the IBS state is swap type C, the hypervisor is responsible for saving
+ * its own IBS state before VMRUN.
+ */
+static void svm_save_host_ibs_msrs(struct vmcb_save_area *hostsa)
+{
A comment here (and in the restore function) about how IBSFETCHCTL and
IBSOPCTL are saved/restored as part of the calls to amd_vibs_window()
would be nice to have.
+ rdmsrl(MSR_AMD64_IBSFETCHLINAD, hostsa->ibs_fetch_linear_addr);
+ rdmsrl(MSR_AMD64_IBSOPRIP, hostsa->ibs_op_rip);
+ rdmsrl(MSR_AMD64_IBSOPDATA, hostsa->ibs_op_data);
+ rdmsrl(MSR_AMD64_IBSOPDATA2, hostsa->ibs_op_data2);
+ rdmsrl(MSR_AMD64_IBSOPDATA3, hostsa->ibs_op_data3);
+ rdmsrl(MSR_AMD64_IBSDCLINAD, hostsa->ibs_dc_linear_addr);
+ rdmsrl(MSR_AMD64_IBSBRTARGET, hostsa->ibs_br_target);
+ rdmsrl(MSR_AMD64_ICIBSEXTDCTL, hostsa->ibs_fetch_extd_ctl);
+}
+
+/*
+ * Since the IBS state is swap type C, the hypervisor is responsible for
+ * restoring its own IBS state after VMEXIT.
+ */
+static void svm_restore_host_ibs_msrs(struct vmcb_save_area *hostsa)
+{
+ wrmsrl(MSR_AMD64_IBSFETCHLINAD, hostsa->ibs_fetch_linear_addr);
+ wrmsrl(MSR_AMD64_IBSOPRIP, hostsa->ibs_op_rip);
+ wrmsrl(MSR_AMD64_IBSOPDATA, hostsa->ibs_op_data);
+ wrmsrl(MSR_AMD64_IBSOPDATA2, hostsa->ibs_op_data2);
+ wrmsrl(MSR_AMD64_IBSOPDATA3, hostsa->ibs_op_data3);
+ wrmsrl(MSR_AMD64_IBSDCLINAD, hostsa->ibs_dc_linear_addr);
+ wrmsrl(MSR_AMD64_IBSBRTARGET, hostsa->ibs_br_target);
+ wrmsrl(MSR_AMD64_ICIBSEXTDCTL, hostsa->ibs_fetch_extd_ctl);
+}
+
+/*
+ * Host states are categorized into three swap types based on how it is
+ * handled by hardware during a switch.
+ * Below enum represent host states which are categorized as Swap type C
+ *
+ * C: VMRUN: Host state _NOT_ saved in host save area
+ * VMEXIT: Host state initializard to default values.
+ *
+ * Swap type C state is not loaded by VMEXIT and is not saved by VMRUN.
+ * It needs to be saved/restored manually.
+ */
+enum {
+ SWAP_TYPE_C_IBS = 0,
+ SWAP_TYPE_C_MAX
+};
+
+/*
+ * Since IBS state is swap type C, hypervisor needs to disable IBS, then save
+ * IBS MSRs before VMRUN and re-enable it, then restore IBS MSRs after VMEXIT.
+ * This order is important, if not followed, software ends up reading inaccurate
+ * IBS registers.
+ */
+static noinstr u32 svm_save_swap_type_c(struct kvm_vcpu *vcpu)
+{
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+ struct vmcb_save_area *hostsa;
+ u32 restore_mask = 0;
+
+ hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+
+ if (to_svm(vcpu)->ibs_enabled) {
+ bool en = amd_vibs_window(WINDOW_START, &hostsa->ibs_fetch_ctl, &hostsa->ibs_op_ctl);
+
+ if (en) {
Why not just: if (amd_vibs_window(WINDOW_START, ...)) {
no need to define "en" just to use it once.
+ svm_save_host_ibs_msrs(hostsa);
+ restore_mask |= 1 << SWAP_TYPE_C_IBS;
+ }
+ }
+ return restore_mask;
+}
+
+static noinstr void svm_restore_swap_type_c(struct kvm_vcpu *vcpu, u32 restore_mask)
+{
+ struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
+ struct vmcb_save_area *hostsa;
+
+ hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+
+ if (restore_mask & (1 << SWAP_TYPE_C_IBS)) {
+ svm_restore_host_ibs_msrs(hostsa);
+ amd_vibs_window(WINDOW_STOPPING, &hostsa->ibs_fetch_ctl, &hostsa->ibs_op_ctl);
+ }
+}
+
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u32 restore_mask;
guest_state_enter_irqoff();
amd_clear_divider();
- if (sev_es_guest(vcpu->kvm))
+ if (sev_es_guest(vcpu->kvm)) {
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
- else
+ } else {
+ restore_mask = svm_save_swap_type_c(vcpu);
__svm_vcpu_run(svm, spec_ctrl_intercepted);
+ if (restore_mask)
+ svm_restore_swap_type_c(vcpu, restore_mask);
Unconditionally calling svm_restore_swap_type_c() and having the if check
in that function would make this a bit cleaner.
+ }
+
guest_state_exit_irqoff();
}
@@ -4137,6 +4291,13 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
/* Any pending NMI will happen here */
+ /*
+ * Disable the IBS window since any pending IBS NMIs will have been
+ * handled.
+ */
+ if (svm->ibs_enabled)
+ amd_vibs_window(WINDOW_STOPPED, NULL, NULL);
+
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_after_interrupt(vcpu);
@@ -5225,6 +5386,13 @@ static __init int svm_hardware_setup(void)
pr_info("LBR virtualization supported\n");
}
+ if (vibs) {
+ if ((vnmi || avic) && boot_cpu_has(X86_FEATURE_VIBS))
+ pr_info("IBS virtualization supported\n");
+ else
+ vibs = false;
+ }
How about:
vibs = boot_cpu_has(X86_FEATURE_VIBS)) && (vnmi || avic);
if (vibs)
pr_info(...);
+
if (!enable_pmu)
pr_info("PMU virtualization is disabled\n");
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c7eb82a78127..c2a02629a1d1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 46
+#define MAX_DIRECT_ACCESS_MSRS 62
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -260,6 +260,7 @@ struct vcpu_svm {
unsigned long soft_int_old_rip;
unsigned long soft_int_next_rip;
bool soft_int_injected;
+ bool ibs_enabled;
u32 ldr_reg;
u32 dfr_reg;
@@ -732,6 +733,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
+void svm_ibs_msr_interception(struct vcpu_svm *svm, bool intercept);
This doesn't seem necessary and isn't part of the sev.c file anyway.
Thanks,
Tom
/* vmenter.S */