> -----Original Message----- > From: Wei Wang [mailto:wei.w.wang@xxxxxxxxx] > Sent: Thursday, September 20, 2018 6:06 PM > To: linux-kernel@xxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx; pbonzini@xxxxxxxxxx; > ak@xxxxxxxxxxxxxxx > Cc: kan.liang@xxxxxxxxx; peterz@xxxxxxxxxxxxx; mingo@xxxxxxxxxx; > rkrcmar@xxxxxxxxxx; like.xu@xxxxxxxxx; wei.w.wang@xxxxxxxxx; > jannh@xxxxxxxxxx; Gonglei (Arei) <arei.gonglei@xxxxxxxxxx> > Subject: [PATCH v3 5/5] KVM/x86/lbr: lazy save the guest lbr stack > > When the vCPU is scheduled in: > - if the lbr feature was used in the last vCPU time slice, set the lbr > stack to be interceptible, so that the host can capture whether the > lbr feature will be used in this time slice; > - if the lbr feature wasn't used in the last vCPU time slice, disable > the vCPU support of the guest lbr switching. > > Upon the first access to one of the lbr related MSRs (since the vCPU was > scheduled in): > - record that the guest has used the lbr; > - create a host perf event to help save/restore the guest lbr stack if > the guest uses the user callstack mode lbr stack; > - pass the stack through to the guest. > > Suggested-by: Andi Kleen <ak@xxxxxxxxxxxxxxx> > Signed-off-by: Like Xu <like.xu@xxxxxxxxx> > Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> > --- > arch/x86/events/intel/lbr.c | 16 +++++ > arch/x86/include/asm/kvm_host.h | 4 ++ > arch/x86/include/asm/perf_event.h | 6 ++ > arch/x86/kvm/pmu.h | 5 ++ > arch/x86/kvm/vmx.c | 137 > ++++++++++++++++++++++++++++++++++++++ > 5 files changed, 168 insertions(+) > > diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c > index 915fcc3..a260015 100644 > --- a/arch/x86/events/intel/lbr.c > +++ b/arch/x86/events/intel/lbr.c > @@ -64,6 +64,7 @@ static const enum { > #define LBR_NO_INFO (1ULL << LBR_NO_INFO_BIT) > > #define LBR_PLM (LBR_KERNEL | LBR_USER) > +#define LBR_USER_CALLSTACK (LBR_CALL_STACK | LBR_USER) > > #define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */ > #define LBR_NOT_SUPP -1 /* LBR filter not supported */ > @@ -1283,6 +1284,21 @@ void intel_pmu_lbr_init_knl(void) > } > > /** > + * lbr_select_user_callstack - check if the user callstack mode is set > + * > + * @lbr_select: the lbr select msr > + * > + * Returns: true if the msr is configured to the user callstack mode. > + * Otherwise, false. > + * > + */ > +bool lbr_select_user_callstack(u64 lbr_select) > +{ > + return !!(lbr_select & LBR_USER_CALLSTACK); > +} > +EXPORT_SYMBOL_GPL(lbr_select_user_callstack); > + > +/** > * perf_get_lbr_stack - get the lbr stack related MSRs > * > * @stack: the caller's memory to get the lbr stack > diff --git a/arch/x86/include/asm/kvm_host.h > b/arch/x86/include/asm/kvm_host.h > index fdcac01..41b4d29 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -730,6 +730,10 @@ struct kvm_vcpu_arch { > > /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ > bool l1tf_flush_l1d; > + /* Indicate if the guest is using lbr with the user callstack mode */ > + bool lbr_user_callstack; > + /* Indicate if the lbr msrs were accessed in this vCPU time slice */ > + bool lbr_used; > }; > > struct kvm_lpage_info { > diff --git a/arch/x86/include/asm/perf_event.h > b/arch/x86/include/asm/perf_event.h > index e893a69..2d7ae55 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -277,6 +277,7 @@ struct perf_lbr_stack { > unsigned long info; > }; > > +extern bool lbr_select_user_callstack(u64 msr_lbr_select); > extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); > extern int perf_get_lbr_stack(struct perf_lbr_stack *stack); > extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); > @@ -288,6 +289,11 @@ static inline struct perf_guest_switch_msr > *perf_guest_get_msrs(int *nr) > return NULL; > } > > +static bool lbr_select_user_callstack(u64 msr_lbr_select) > +{ > + return false; > +} > + > static inline int perf_get_lbr_stack(struct perf_lbr_stack *stack) > { > return -1; > diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h > index e872aed..94f0624 100644 > --- a/arch/x86/kvm/pmu.h > +++ b/arch/x86/kvm/pmu.h > @@ -102,6 +102,11 @@ static inline struct kvm_pmc *get_fixed_pmc(struct > kvm_pmu *pmu, u32 msr) > return NULL; > } > > +static inline bool intel_pmu_save_guest_lbr_enabled(struct kvm_vcpu *vcpu) > +{ > + return !!vcpu_to_pmu(vcpu)->guest_lbr_event; > +} > + > void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); > void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); > void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 92705b5..ae20563 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -1282,6 +1282,9 @@ static bool > nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, > static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); > static void __always_inline vmx_disable_intercept_for_msr(unsigned long > *msr_bitmap, > u32 msr, int type); > +static void > +__always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 > msr, > + int type, bool value); > > static DEFINE_PER_CPU(struct vmcs *, vmxarea); > static DEFINE_PER_CPU(struct vmcs *, current_vmcs); > @@ -4056,6 +4059,120 @@ static int vmx_get_msr_feature(struct > kvm_msr_entry *msr) > return 0; > } > > +static void vmx_set_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set) > +{ > + unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; > + struct perf_lbr_stack *stack = &vcpu->kvm->arch.lbr_stack; > + int nr = stack->nr; > + int i; > + > + vmx_set_intercept_for_msr(msr_bitmap, stack->tos, MSR_TYPE_RW, > set); > + for (i = 0; i < nr; i++) { > + vmx_set_intercept_for_msr(msr_bitmap, stack->from + i, > + MSR_TYPE_RW, set); > + vmx_set_intercept_for_msr(msr_bitmap, stack->to + i, > + MSR_TYPE_RW, set); > + if (stack->info) > + vmx_set_intercept_for_msr(msr_bitmap, stack->info + i, > + MSR_TYPE_RW, set); > + } > +} > + > +static inline bool msr_is_lbr_stack(struct kvm_vcpu *vcpu, u32 index) > +{ > + struct perf_lbr_stack *stack = &vcpu->kvm->arch.lbr_stack; > + int nr = stack->nr; > + > + return !!(index == stack->tos || > + (index >= stack->from && index < stack->from + nr) || > + (index >= stack->to && index < stack->to + nr) || > + (index >= stack->info && index < stack->info)); > +} > + > +static bool guest_get_lbr_msr(struct kvm_vcpu *vcpu, struct msr_data > *msr_info) > +{ > + u32 index = msr_info->index; > + bool ret = false; > + > + switch (index) { > + case MSR_IA32_DEBUGCTLMSR: > + msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); > + ret = true; > + break; > + case MSR_LBR_SELECT: > + ret = true; > + rdmsrl(index, msr_info->data); > + break; > + default: > + if (msr_is_lbr_stack(vcpu, index)) { > + ret = true; > + rdmsrl(index, msr_info->data); > + } > + } > + > + return ret; > +} > + > +static bool guest_set_lbr_msr(struct kvm_vcpu *vcpu, struct msr_data > *msr_info) > +{ > + u32 index = msr_info->index; > + u64 data = msr_info->data; > + bool ret = false; > + > + switch (index) { > + case MSR_IA32_DEBUGCTLMSR: > + ret = true; > + /* > + * Currently, only FREEZE_LBRS_ON_PMI and DEBUGCTLMSR_LBR > are > + * supported. > + */ > + data &= (DEBUGCTLMSR_FREEZE_LBRS_ON_PMI | > DEBUGCTLMSR_LBR); > + vmcs_write64(GUEST_IA32_DEBUGCTL, msr_info->data); > + break; > + case MSR_LBR_SELECT: > + ret = true; > + if (lbr_select_user_callstack(data)) > + vcpu->arch.lbr_user_callstack = true; > + else > + vcpu->arch.lbr_user_callstack = false; > + wrmsrl(index, msr_info->data); > + break; > + default: > + if (msr_is_lbr_stack(vcpu, index)) { > + ret = true; > + wrmsrl(index, msr_info->data); > + } > + } > + > + return ret; > +} > + > +static bool guest_access_lbr_msr(struct kvm_vcpu *vcpu, > + struct msr_data *msr_info, > + bool set) > +{ > + bool ret = false; > + > + if (!vcpu->kvm->arch.guest_lbr_enabled) > + return false; > + > + if (set) > + ret = guest_set_lbr_msr(vcpu, msr_info); > + else > + ret = guest_get_lbr_msr(vcpu, msr_info); > + > + if (ret) { > + vcpu->arch.lbr_used = true; > + vmx_set_intercept_for_lbr_msrs(vcpu, false); You can use if (!vcpu->arch.lbr_used) as the condition of assign values. They are need only once. Thanks, -Gonglei