On 8/1/2024 10:28 AM, Mingwei Zhang wrote: > From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> > > There will be a dedicated interrupt vector for guests on some platforms, > e.g., Intel. Add an interface to switch the interrupt vector while > entering/exiting a guest. > > When PMI switch into a new guest vector, guest_lvtpc value need to be > reflected onto HW, e,g., guest clear PMI mask bit, the HW PMI mask > bit should be cleared also, then PMI can be generated continuously > for guest. So guest_lvtpc parameter is added into perf_guest_enter() > and switch_interrupt(). > > At switch_interrupt(), the target pmu with PASSTHROUGH cap should > be found. Since only one passthrough pmu is supported, we keep the > implementation simply by tracking the pmu as a global variable. > > Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> > > [Simplify the commit with removal of srcu lock/unlock since only one pmu is > supported.] > > Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx> > --- > include/linux/perf_event.h | 9 +++++++-- > kernel/events/core.c | 36 ++++++++++++++++++++++++++++++++++-- > 2 files changed, 41 insertions(+), 4 deletions(-) > > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 75773f9890cc..aeb08f78f539 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -541,6 +541,11 @@ struct pmu { > * Check period value for PERF_EVENT_IOC_PERIOD ioctl. > */ > int (*check_period) (struct perf_event *event, u64 value); /* optional */ > + > + /* > + * Switch the interrupt vectors, e.g., guest enter/exit. > + */ > + void (*switch_interrupt) (bool enter, u32 guest_lvtpc); /* optional */ > }; > > enum perf_addr_filter_action_t { > @@ -1738,7 +1743,7 @@ extern int perf_event_period(struct perf_event *event, u64 value); > extern u64 perf_event_pause(struct perf_event *event, bool reset); > int perf_get_mediated_pmu(void); > void perf_put_mediated_pmu(void); > -void perf_guest_enter(void); > +void perf_guest_enter(u32 guest_lvtpc); > void perf_guest_exit(void); > #else /* !CONFIG_PERF_EVENTS: */ > static inline void * > @@ -1833,7 +1838,7 @@ static inline int perf_get_mediated_pmu(void) > } > > static inline void perf_put_mediated_pmu(void) { } > -static inline void perf_guest_enter(void) { } > +static inline void perf_guest_enter(u32 guest_lvtpc) { } > static inline void perf_guest_exit(void) { } > #endif > > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 57ff737b922b..047ca5748ee2 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -422,6 +422,7 @@ static inline bool is_include_guest_event(struct perf_event *event) > > static LIST_HEAD(pmus); > static DEFINE_MUTEX(pmus_lock); > +static struct pmu *passthru_pmu; > static struct srcu_struct pmus_srcu; > static cpumask_var_t perf_online_mask; > static struct kmem_cache *perf_event_cache; > @@ -5941,8 +5942,21 @@ void perf_put_mediated_pmu(void) > } > EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); > > +static void perf_switch_interrupt(bool enter, u32 guest_lvtpc) > +{ > + /* Mediated passthrough PMU should have PASSTHROUGH_VPMU cap. */ > + if (!passthru_pmu) > + return; > + > + if (passthru_pmu->switch_interrupt && > + try_module_get(passthru_pmu->module)) { > + passthru_pmu->switch_interrupt(enter, guest_lvtpc); > + module_put(passthru_pmu->module); > + } > +} > + > /* When entering a guest, schedule out all exclude_guest events. */ > -void perf_guest_enter(void) > +void perf_guest_enter(u32 guest_lvtpc) > { > struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); > > @@ -5962,6 +5976,8 @@ void perf_guest_enter(void) > perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST); > } > > + perf_switch_interrupt(true, guest_lvtpc); > + > __this_cpu_write(perf_in_guest, true); > > unlock: > @@ -5980,6 +5996,8 @@ void perf_guest_exit(void) > if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) > goto unlock; > > + perf_switch_interrupt(false, 0); > + > perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST); > ctx_sched_in(&cpuctx->ctx, EVENT_GUEST); > perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST); > @@ -11842,7 +11860,21 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) > if (!pmu->event_idx) > pmu->event_idx = perf_event_idx_default; > > - list_add_rcu(&pmu->entry, &pmus); > + /* > + * Initialize passthru_pmu with the core pmu that has > + * PERF_PMU_CAP_PASSTHROUGH_VPMU capability. > + */ > + if (pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU) { > + if (!passthru_pmu) > + passthru_pmu = pmu; > + > + if (WARN_ONCE(passthru_pmu != pmu, "Only one passthrough PMU is supported\n")) { > + ret = -EINVAL; > + goto free_dev; > + } > + } Our intention is to virtualize IBS PMUs (Op and Fetch) using the same framework. However, if IBS PMUs are also using the PERF_PMU_CAP_PASSTHROUGH_VPMU capability, IBS PMU registration fails at this point because the Core PMU is already registered with PERF_PMU_CAP_PASSTHROUGH_VPMU. > + > + list_add_tail_rcu(&pmu->entry, &pmus); > atomic_set(&pmu->exclusive_cnt, 0); > ret = 0; > unlock: