Re: [RFC PATCH v3 14/58] perf: Add switch_interrupt() interface

Manali Shukla <manali.shukla@xxxxxxx> · Thu, 19 Sep 2024 11:32:06 +0530

On 8/1/2024 10:28 AM, Mingwei Zhang wrote:
> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> 
> There will be a dedicated interrupt vector for guests on some platforms,
> e.g., Intel. Add an interface to switch the interrupt vector while
> entering/exiting a guest.
> 
> When PMI switch into a new guest vector, guest_lvtpc value need to be
> reflected onto HW, e,g., guest clear PMI mask bit, the HW PMI mask
> bit should be cleared also, then PMI can be generated continuously
> for guest. So guest_lvtpc parameter is added into perf_guest_enter()
> and switch_interrupt().
> 
> At switch_interrupt(), the target pmu with PASSTHROUGH cap should
> be found. Since only one passthrough pmu is supported, we keep the
> implementation simply by tracking the pmu as a global variable.
> 
> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
> 
> [Simplify the commit with removal of srcu lock/unlock since only one pmu is
> supported.]
> 
> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> ---
>  include/linux/perf_event.h |  9 +++++++--
>  kernel/events/core.c       | 36 ++++++++++++++++++++++++++++++++++--
>  2 files changed, 41 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 75773f9890cc..aeb08f78f539 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -541,6 +541,11 @@ struct pmu {
>  	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
>  	 */
>  	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
> +
> +	/*
> +	 * Switch the interrupt vectors, e.g., guest enter/exit.
> +	 */
> +	void (*switch_interrupt)	(bool enter, u32 guest_lvtpc); /* optional */
>  };
>  
>  enum perf_addr_filter_action_t {
> @@ -1738,7 +1743,7 @@ extern int perf_event_period(struct perf_event *event, u64 value);
>  extern u64 perf_event_pause(struct perf_event *event, bool reset);
>  int perf_get_mediated_pmu(void);
>  void perf_put_mediated_pmu(void);
> -void perf_guest_enter(void);
> +void perf_guest_enter(u32 guest_lvtpc);
>  void perf_guest_exit(void);
>  #else /* !CONFIG_PERF_EVENTS: */
>  static inline void *
> @@ -1833,7 +1838,7 @@ static inline int perf_get_mediated_pmu(void)
>  }
>  
>  static inline void perf_put_mediated_pmu(void)			{ }
> -static inline void perf_guest_enter(void)			{ }
> +static inline void perf_guest_enter(u32 guest_lvtpc)		{ }
>  static inline void perf_guest_exit(void)			{ }
>  #endif
>  
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 57ff737b922b..047ca5748ee2 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -422,6 +422,7 @@ static inline bool is_include_guest_event(struct perf_event *event)
>  
>  static LIST_HEAD(pmus);
>  static DEFINE_MUTEX(pmus_lock);
> +static struct pmu *passthru_pmu;
>  static struct srcu_struct pmus_srcu;
>  static cpumask_var_t perf_online_mask;
>  static struct kmem_cache *perf_event_cache;
> @@ -5941,8 +5942,21 @@ void perf_put_mediated_pmu(void)
>  }
>  EXPORT_SYMBOL_GPL(perf_put_mediated_pmu);
>  
> +static void perf_switch_interrupt(bool enter, u32 guest_lvtpc)
> +{
> +	/* Mediated passthrough PMU should have PASSTHROUGH_VPMU cap. */
> +	if (!passthru_pmu)
> +		return;
> +
> +	if (passthru_pmu->switch_interrupt &&
> +	    try_module_get(passthru_pmu->module)) {
> +		passthru_pmu->switch_interrupt(enter, guest_lvtpc);
> +		module_put(passthru_pmu->module);
> +	}
> +}
> +
>  /* When entering a guest, schedule out all exclude_guest events. */
> -void perf_guest_enter(void)
> +void perf_guest_enter(u32 guest_lvtpc)
>  {
>  	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>  
> @@ -5962,6 +5976,8 @@ void perf_guest_enter(void)
>  		perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>  	}
>  
> +	perf_switch_interrupt(true, guest_lvtpc);
> +
>  	__this_cpu_write(perf_in_guest, true);
>  
>  unlock:
> @@ -5980,6 +5996,8 @@ void perf_guest_exit(void)
>  	if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest)))
>  		goto unlock;
>  
> +	perf_switch_interrupt(false, 0);
> +
>  	perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>  	ctx_sched_in(&cpuctx->ctx, EVENT_GUEST);
>  	perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
> @@ -11842,7 +11860,21 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
>  	if (!pmu->event_idx)
>  		pmu->event_idx = perf_event_idx_default;
>  
> -	list_add_rcu(&pmu->entry, &pmus);
> +	/*
> +	 * Initialize passthru_pmu with the core pmu that has
> +	 * PERF_PMU_CAP_PASSTHROUGH_VPMU capability.
> +	 */
> +	if (pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU) {
> +		if (!passthru_pmu)
> +			passthru_pmu = pmu;
> +
> +		if (WARN_ONCE(passthru_pmu != pmu, "Only one passthrough PMU is supported\n")) {
> +			ret = -EINVAL;
> +			goto free_dev;
> +		}
> +	}

Our intention is to virtualize IBS PMUs (Op and Fetch) using the same framework. However, 
if IBS PMUs are also using the PERF_PMU_CAP_PASSTHROUGH_VPMU capability, IBS PMU registration
fails at this point because the Core PMU is already registered with PERF_PMU_CAP_PASSTHROUGH_VPMU.

> +
> +	list_add_tail_rcu(&pmu->entry, &pmus);
>  	atomic_set(&pmu->exclusive_cnt, 0);
>  	ret = 0;
>  unlock: