On 9/21/2023 4:29 PM, Xiong Zhang wrote:
To emulate Freeze_LBR_On_PMI feature, KVM PMU disables guest LBR at PMI
injection. When guest LBR is disabled, vLBR event may be released at two
vCPU sched-in later. once vLBR event is released, KVM will create a new
vLBR event when guest access LBR MSRs again. First this adds overhead
at vLBR event release and re-creation. Second this may changes guest
LBR contend as vLBR event creation may cause host LBR drvier reset
hw LBR facility.
This commit avoids the vLBR release for Freeze_LBR_On_PMI emulation.
It changes boolean lbr_desc->in_use into enum lbr_desc->state, so
it could express more LBR states, KVM sets lbr_desc->state as
FREEZE_ON_PMI at vPMI injection, so that vLBR release function could
check this state to avoid vLBR release. When guest enables LBR
at the end of the guest PMI handler, KVM will set lbr_desc->state
as IN_USE.
Signed-off-by: Xiong Zhang <xiong.y.zhang@xxxxxxxxx>
---
arch/x86/kvm/vmx/pmu_intel.c | 8 +++++---
arch/x86/kvm/vmx/vmx.c | 2 +-
arch/x86/kvm/vmx/vmx.h | 8 +++++++-
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 76d7bd8e4fc6..c8d46c3d1ab6 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -628,7 +628,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
lbr_desc->records.nr = 0;
lbr_desc->event = NULL;
lbr_desc->msr_passthrough = false;
- lbr_desc->in_use = FALSE;
+ lbr_desc->state = LBR_STATE_PREDICT_FREE;
}
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
@@ -671,6 +671,7 @@ static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
data &= ~DEBUGCTLMSR_LBR;
vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ vcpu_to_lbr_desc(vcpu)->state = LBR_STATE_FREEZE_ON_PMI;
}
}
@@ -765,9 +766,10 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) {
- if (!lbr_desc->in_use)
+ if (lbr_desc->state == LBR_STATE_PREDICT_FREE)
intel_pmu_release_guest_lbr_event(vcpu);
- lbr_desc->in_use = false;
+ else if (lbr_desc->state == LBR_STATE_IN_USE)
+ lbr_desc->state = LBR_STATE_PREDICT_FREE;
}
}
I'm concerning whether there is a risk that the vLBR event can't be
released forever. Base on the logic, the state must be set to
LBR_STATE_IN_USE firstly and then could be freed in next cycle. Assuming
the guest PMI handler doesn't enable LBR again as some reason, the LBR
state would stay in LBR_STATE_FREEZE_ON_PMI state and the vLBR event
would not be released and LBR stack would not be reset. when guest
enables LBR in next time, guest may see the stale LBR data. I think we
may add a backup mechanism to ensure the vLBR event can be freed if vLBR
is disabled after several schedule slots.
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4056e19266b5..565df8eeb78b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2242,7 +2242,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (intel_pmu_lbr_is_enabled(vcpu) && (data & DEBUGCTLMSR_LBR)) {
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
- lbr_desc->in_use = true;
+ lbr_desc->state = LBR_STATE_IN_USE;
if (!lbr_desc->event)
intel_pmu_create_guest_lbr_event(vcpu);
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 547edeb52d09..0cb68a319fc8 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -93,6 +93,12 @@ union vmx_exit_reason {
u32 full;
};
+enum lbr_state {
+ LBR_STATE_PREDICT_FREE = 0,
+ LBR_STATE_IN_USE = 1,
+ LBR_STATE_FREEZE_ON_PMI = 2
+};
+
struct lbr_desc {
/* Basic info about guest LBR records. */
struct x86_pmu_lbr records;
@@ -108,7 +114,7 @@ struct lbr_desc {
/* True if LBRs are marked as not intercepted in the MSR bitmap */
bool msr_passthrough;
- bool in_use;
+ enum lbr_state state;
};
/*