Re: [RFC PATCH v3 14/58] perf: Add switch_interrupt() interface

Manali Shukla <manali.shukla@xxxxxxx> · Fri, 20 Sep 2024 10:39:36 +0530

On 9/19/2024 6:30 PM, Liang, Kan wrote:
> 
> 
> On 2024-09-19 2:02 a.m., Manali Shukla wrote:
>> On 8/1/2024 10:28 AM, Mingwei Zhang wrote:
>>> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>>>
>>> There will be a dedicated interrupt vector for guests on some platforms,
>>> e.g., Intel. Add an interface to switch the interrupt vector while
>>> entering/exiting a guest.
>>>
>>> When PMI switch into a new guest vector, guest_lvtpc value need to be
>>> reflected onto HW, e,g., guest clear PMI mask bit, the HW PMI mask
>>> bit should be cleared also, then PMI can be generated continuously
>>> for guest. So guest_lvtpc parameter is added into perf_guest_enter()
>>> and switch_interrupt().
>>>
>>> At switch_interrupt(), the target pmu with PASSTHROUGH cap should
>>> be found. Since only one passthrough pmu is supported, we keep the
>>> implementation simply by tracking the pmu as a global variable.
>>>
>>> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
>>>
>>> [Simplify the commit with removal of srcu lock/unlock since only one pmu is
>>> supported.]
>>>
>>> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
>>> ---
>>>  include/linux/perf_event.h |  9 +++++++--
>>>  kernel/events/core.c       | 36 ++++++++++++++++++++++++++++++++++--
>>>  2 files changed, 41 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>>> index 75773f9890cc..aeb08f78f539 100644
>>> --- a/include/linux/perf_event.h
>>> +++ b/include/linux/perf_event.h
>>> @@ -541,6 +541,11 @@ struct pmu {
>>>  	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
>>>  	 */
>>>  	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
>>> +
>>> +	/*
>>> +	 * Switch the interrupt vectors, e.g., guest enter/exit.
>>> +	 */
>>> +	void (*switch_interrupt)	(bool enter, u32 guest_lvtpc); /* optional */
>>>  };
>>>  
>>>  enum perf_addr_filter_action_t {
>>> @@ -1738,7 +1743,7 @@ extern int perf_event_period(struct perf_event *event, u64 value);
>>>  extern u64 perf_event_pause(struct perf_event *event, bool reset);
>>>  int perf_get_mediated_pmu(void);
>>>  void perf_put_mediated_pmu(void);
>>> -void perf_guest_enter(void);
>>> +void perf_guest_enter(u32 guest_lvtpc);
>>>  void perf_guest_exit(void);
>>>  #else /* !CONFIG_PERF_EVENTS: */
>>>  static inline void *
>>> @@ -1833,7 +1838,7 @@ static inline int perf_get_mediated_pmu(void)
>>>  }
>>>  
>>>  static inline void perf_put_mediated_pmu(void)			{ }
>>> -static inline void perf_guest_enter(void)			{ }
>>> +static inline void perf_guest_enter(u32 guest_lvtpc)		{ }
>>>  static inline void perf_guest_exit(void)			{ }
>>>  #endif
>>>  
>>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>>> index 57ff737b922b..047ca5748ee2 100644
>>> --- a/kernel/events/core.c
>>> +++ b/kernel/events/core.c
>>> @@ -422,6 +422,7 @@ static inline bool is_include_guest_event(struct perf_event *event)
>>>  
>>>  static LIST_HEAD(pmus);
>>>  static DEFINE_MUTEX(pmus_lock);
>>> +static struct pmu *passthru_pmu;
>>>  static struct srcu_struct pmus_srcu;
>>>  static cpumask_var_t perf_online_mask;
>>>  static struct kmem_cache *perf_event_cache;
>>> @@ -5941,8 +5942,21 @@ void perf_put_mediated_pmu(void)
>>>  }
>>>  EXPORT_SYMBOL_GPL(perf_put_mediated_pmu);
>>>  
>>> +static void perf_switch_interrupt(bool enter, u32 guest_lvtpc)
>>> +{
>>> +	/* Mediated passthrough PMU should have PASSTHROUGH_VPMU cap. */
>>> +	if (!passthru_pmu)
>>> +		return;
>>> +
>>> +	if (passthru_pmu->switch_interrupt &&
>>> +	    try_module_get(passthru_pmu->module)) {
>>> +		passthru_pmu->switch_interrupt(enter, guest_lvtpc);
>>> +		module_put(passthru_pmu->module);
>>> +	}
>>> +}
>>> +
>>>  /* When entering a guest, schedule out all exclude_guest events. */
>>> -void perf_guest_enter(void)
>>> +void perf_guest_enter(u32 guest_lvtpc)
>>>  {
>>>  	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
>>>  
>>> @@ -5962,6 +5976,8 @@ void perf_guest_enter(void)
>>>  		perf_ctx_enable(cpuctx->task_ctx, EVENT_GUEST);
>>>  	}
>>>  
>>> +	perf_switch_interrupt(true, guest_lvtpc);
>>> +
>>>  	__this_cpu_write(perf_in_guest, true);
>>>  
>>>  unlock:
>>> @@ -5980,6 +5996,8 @@ void perf_guest_exit(void)
>>>  	if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest)))
>>>  		goto unlock;
>>>  
>>> +	perf_switch_interrupt(false, 0);
>>> +
>>>  	perf_ctx_disable(&cpuctx->ctx, EVENT_GUEST);
>>>  	ctx_sched_in(&cpuctx->ctx, EVENT_GUEST);
>>>  	perf_ctx_enable(&cpuctx->ctx, EVENT_GUEST);
>>> @@ -11842,7 +11860,21 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
>>>  	if (!pmu->event_idx)
>>>  		pmu->event_idx = perf_event_idx_default;
>>>  
>>> -	list_add_rcu(&pmu->entry, &pmus);
>>> +	/*
>>> +	 * Initialize passthru_pmu with the core pmu that has
>>> +	 * PERF_PMU_CAP_PASSTHROUGH_VPMU capability.
>>> +	 */
>>> +	if (pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU) {
>>> +		if (!passthru_pmu)
>>> +			passthru_pmu = pmu;
>>> +
>>> +		if (WARN_ONCE(passthru_pmu != pmu, "Only one passthrough PMU is supported\n")) {
>>> +			ret = -EINVAL;
>>> +			goto free_dev;
>>> +		}
>>> +	}
>>
>>
>> Our intention is to virtualize IBS PMUs (Op and Fetch) using the same framework. However, 
>> if IBS PMUs are also using the PERF_PMU_CAP_PASSTHROUGH_VPMU capability, IBS PMU registration
>> fails at this point because the Core PMU is already registered with PERF_PMU_CAP_PASSTHROUGH_VPMU.
>>
> 
> The original implementation doesn't limit the number of PMUs with
> PERF_PMU_CAP_PASSTHROUGH_VPMU. But at that time, we could not find a
> case of more than one PMU with the flag. After several debates, the
> patch was simplified only to support one PMU with the flag.
> It should not be hard to change it back.
> 
> Thanks,
> Kan
> 

Yes, we have a use case to use mediated passthrough vPMU framework for IBS virtualization.
So, we will need it.

- Manali

>>> +
>>> +	list_add_tail_rcu(&pmu->entry, &pmus);
>>>  	atomic_set(&pmu->exclusive_cnt, 0);
>>>  	ret = 0;
>>>  unlock:
>>
>>