Add x86_perf_mask_perf_counters to reserve counters from the host perf subsystem. The masked counters will not be assigned to any host perf events. This can be used by the hypervisor to reserve perf counters for a guest to use. This function is currently supported on Intel CPUs only, but put in x86 perf core because the counter assignment is implemented here and we need to re-enable the pmu which is defined in the x86 perf core in the case that a counter to be masked happens to be used by the host. Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> --- arch/x86/events/core.c | 37 +++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/perf_event.h | 1 + 2 files changed, 38 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 106911b..e73135a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -716,6 +716,7 @@ struct perf_sched { static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, int num, int wmin, int wmax, int gpmax) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; memset(sched, 0, sizeof(*sched)); @@ -723,6 +724,9 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** sched->max_weight = wmax; sched->max_gp = gpmax; sched->constraints = constraints; +#ifdef CONFIG_CPU_SUP_INTEL + sched->state.used[0] = cpuc->intel_ctrl_guest_mask; +#endif for (idx = 0; idx < num; idx++) { if (constraints[idx]->weight == wmin) @@ -2386,6 +2390,39 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re } } +#ifdef CONFIG_CPU_SUP_INTEL +/** + * x86_perf_mask_perf_counters - mask perf counters + * @mask: the bitmask of counters + * + * Mask the perf counters that are not available to be used by the perf core. + * If the counter to be masked has been assigned, it will be taken back and + * then the perf core will re-assign usable counters to its events. + * + * This can be used by a component outside the perf core to reserve counters. + * For example, a hypervisor uses it to reserve counters for a guest to use, + * and later return the counters by another call with the related bits cleared. + */ +void x86_perf_mask_perf_counters(u64 mask) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + /* + * If the counter happens to be used by a host event, take it back + * first, and then restart the pmu after mask that counter as being + * reserved. + */ + if (mask & cpuc->intel_ctrl_host_mask) { + perf_pmu_disable(&pmu); + cpuc->intel_ctrl_guest_mask = mask; + perf_pmu_enable(&pmu); + } else { + cpuc->intel_ctrl_guest_mask = mask; + } +} +EXPORT_SYMBOL_GPL(x86_perf_mask_perf_counters); +#endif + static inline int valid_user_frame(const void __user *fp, unsigned long size) { diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8bdf749..5b4463e 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -297,6 +297,7 @@ static inline void perf_check_microcode(void) { } #ifdef CONFIG_CPU_SUP_INTEL extern void intel_pt_handle_vmx(int on); +extern void x86_perf_mask_perf_counters(u64 mask); #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) -- 2.7.4