Re: [RFC PATCH v3 09/58] perf: Add a EVENT_GUEST flag

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Mon, 14 Oct 2024 13:14:16 +0200

On Thu, Aug 01, 2024 at 04:58:18AM +0000, Mingwei Zhang wrote:

> @@ -3334,9 +3401,15 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
>  	 * would only update time for the pinned events.
>  	 */
>  	if (is_active & EVENT_TIME) {
> +		bool stop;
> +
> +		/* vPMU should not stop time */
> +		stop = !(event_type & EVENT_GUEST) &&
> +		       ctx == &cpuctx->ctx;
> +
>  		/* update (and stop) ctx time */
>  		update_context_time(ctx);
> -		update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
> +		update_cgrp_time_from_cpuctx(cpuctx, stop);
>  		/*
>  		 * CPU-release for the below ->is_active store,
>  		 * see __load_acquire() in perf_event_time_now()
> @@ -3354,7 +3427,18 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type)
>  			cpuctx->task_ctx = NULL;
>  	}
>  
> -	is_active ^= ctx->is_active; /* changed bits */
> +	if (event_type & EVENT_GUEST) {
> +		/*
> +		 * Schedule out all !exclude_guest events of PMU
> +		 * with PERF_PMU_CAP_PASSTHROUGH_VPMU.
> +		 */

I thought the premise was that if we have !exclude_guest events, we'll
fail the creation of vPMU, and if we have vPMU we'll fail the creation
of !exclude_guest events.

As such, they're mutually exclusive, and the above comment doesn't make
sense, if we have a vPMU, there are no !exclude_guest events, IOW
schedule out the entire context.

> +		is_active = EVENT_ALL;
> +		__update_context_guest_time(ctx, false);
> +		perf_cgroup_set_timestamp(cpuctx, true);
> +		barrier();
> +	} else {
> +		is_active ^= ctx->is_active; /* changed bits */
> +	}
>  
>  	list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
>  		if (perf_skip_pmu_ctx(pmu_ctx, event_type))

> @@ -3864,13 +3953,22 @@ static int merge_sched_in(struct perf_event *event, void *data)
>  	if (!event_filter_match(event))
>  		return 0;
>  
> -	if (group_can_go_on(event, *can_add_hw)) {
> +	/*
> +	 * Don't schedule in any exclude_guest events of PMU with
> +	 * PERF_PMU_CAP_PASSTHROUGH_VPMU, while a guest is running.
> +	 */

More confusion; if we have vPMU there should not be !exclude_guest
events, right? So the above then becomes 'Don't schedule in any events'.

> +	if (__this_cpu_read(perf_in_guest) && event->attr.exclude_guest &&
> +	    event->pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU &&
> +	    !(msd->event_type & EVENT_GUEST))
> +		return 0;
> +
> +	if (group_can_go_on(event, msd->can_add_hw)) {
>  		if (!group_sched_in(event, ctx))
>  			list_add_tail(&event->active_list, get_event_list(event));

> @@ -3945,7 +4049,23 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
>  			WARN_ON_ONCE(cpuctx->task_ctx != ctx);
>  	}
>  
> -	is_active ^= ctx->is_active; /* changed bits */
> +	if (event_type & EVENT_GUEST) {
> +		/*
> +		 * Schedule in all !exclude_guest events of PMU
> +		 * with PERF_PMU_CAP_PASSTHROUGH_VPMU.
> +		 */

Idem.

> +		is_active = EVENT_ALL;
> +
> +		/*
> +		 * Update ctx time to set the new start time for
> +		 * the exclude_guest events.
> +		 */
> +		update_context_time(ctx);
> +		update_cgrp_time_from_cpuctx(cpuctx, false);
> +		barrier();
> +	} else {
> +		is_active ^= ctx->is_active; /* changed bits */
> +	}