From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> Only KVM knows the exact time when a guest is entering/exiting. Expose two interfaces to KVM to switch the ownership of the PMU resources. Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> Tested-by: Yongwei Ma <yongwei.ma@xxxxxxxxx> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx> --- include/linux/perf_event.h | 4 +++ kernel/events/core.c | 54 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 81a5f8399cb8..75773f9890cc 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1738,6 +1738,8 @@ extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); int perf_get_mediated_pmu(void); void perf_put_mediated_pmu(void); +void perf_guest_enter(void); +void perf_guest_exit(void); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1831,6 +1833,8 @@ static inline int perf_get_mediated_pmu(void) } static inline void perf_put_mediated_pmu(void) { } +static inline void perf_guest_enter(void) { } +static inline void perf_guest_exit(void) { } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/kernel/events/core.c b/kernel/events/core.c index 57648736e43e..57ff737b922b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5941,6 +5941,60 @@ void perf_put_mediated_pmu(void) } EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); +/* When entering a guest, schedule out all exclude_guest events. */ +void perf_guest_enter(void) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); + + lockdep_assert_irqs_disabled(); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) + goto unlock; + + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); + ctx_sched_out(&cpuctx->ctx, EVENT_GUEST); + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); + if (cpuctx->task_ctx) { + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); + task_ctx_sched_out(cpuctx->task_ctx, EVENT_GUEST); + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); + } + + __this_cpu_write(perf_in_guest, true); + +unlock: + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); +} +EXPORT_SYMBOL_GPL(perf_guest_enter); + +void perf_guest_exit(void) +{ + struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); + + lockdep_assert_irqs_disabled(); + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) + goto unlock; + + perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); + ctx_sched_in(&cpuctx->ctx, EVENT_GUEST); + perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); + if (cpuctx->task_ctx) { + perf_ctx_disable(cpuctx->task_ctx, EVENT_GUEST); + ctx_sched_in(cpuctx->task_ctx, EVENT_GUEST); + perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); + } + + __this_cpu_write(perf_in_guest, false); +unlock: + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); +} +EXPORT_SYMBOL_GPL(perf_guest_exit); + /* * Holding the top-level event's child_mutex means that any * descendant process that has inherited this event will block -- 2.46.0.rc1.232.g9752f9e123-goog