Re: [PATCH v2 07/54] perf: Add generic exclude_guest support

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Tue, 7 May 2024 10:58:07 +0200

On Mon, May 06, 2024 at 05:29:32AM +0000, Mingwei Zhang wrote:

> @@ -5791,6 +5801,100 @@ void perf_put_mediated_pmu(void)
>  }
>  EXPORT_SYMBOL_GPL(perf_put_mediated_pmu);
>  
> +static void perf_sched_out_exclude_guest(struct perf_event_context *ctx)
> +{
> +	struct perf_event_pmu_context *pmu_ctx;
> +
> +	update_context_time(ctx);
> +	list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
> +		struct perf_event *event, *tmp;
> +		struct pmu *pmu = pmu_ctx->pmu;
> +
> +		if (!(pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU))
> +			continue;
> +
> +		perf_pmu_disable(pmu);
> +
> +		/*
> +		 * All active events must be exclude_guest events.
> +		 * See perf_get_mediated_pmu().
> +		 * Unconditionally remove all active events.
> +		 */
> +		list_for_each_entry_safe(event, tmp, &pmu_ctx->pinned_active, active_list)
> +			group_sched_out(event, pmu_ctx->ctx);
> +
> +		list_for_each_entry_safe(event, tmp, &pmu_ctx->flexible_active, active_list)
> +			group_sched_out(event, pmu_ctx->ctx);
> +
> +		pmu_ctx->rotate_necessary = 0;
> +
> +		perf_pmu_enable(pmu);
> +	}
> +}
> +
> +/* When entering a guest, schedule out all exclude_guest events. */
> +void perf_guest_enter(void)
> +{
> +	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
> +
> +	lockdep_assert_irqs_disabled();
> +
> +	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
> +
> +	if (WARN_ON_ONCE(__this_cpu_read(perf_in_guest))) {
> +		perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> +		return;
> +	}
> +
> +	perf_sched_out_exclude_guest(&cpuctx->ctx);
> +	if (cpuctx->task_ctx)
> +		perf_sched_out_exclude_guest(cpuctx->task_ctx);
> +
> +	__this_cpu_write(perf_in_guest, true);
> +
> +	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> +}
> +
> +static void perf_sched_in_exclude_guest(struct perf_event_context *ctx)
> +{
> +	struct perf_event_pmu_context *pmu_ctx;
> +
> +	update_context_time(ctx);
> +	list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
> +		struct pmu *pmu = pmu_ctx->pmu;
> +
> +		if (!(pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU))
> +			continue;
> +
> +		perf_pmu_disable(pmu);
> +		pmu_groups_sched_in(ctx, &ctx->pinned_groups, pmu);
> +		pmu_groups_sched_in(ctx, &ctx->flexible_groups, pmu);
> +		perf_pmu_enable(pmu);
> +	}
> +}
> +
> +void perf_guest_exit(void)
> +{
> +	struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
> +
> +	lockdep_assert_irqs_disabled();
> +
> +	perf_ctx_lock(cpuctx, cpuctx->task_ctx);
> +
> +	if (WARN_ON_ONCE(!__this_cpu_read(perf_in_guest))) {
> +		perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> +		return;
> +	}
> +
> +	__this_cpu_write(perf_in_guest, false);
> +
> +	perf_sched_in_exclude_guest(&cpuctx->ctx);
> +	if (cpuctx->task_ctx)
> +		perf_sched_in_exclude_guest(cpuctx->task_ctx);
> +
> +	perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
> +}

Bah, this is a ton of copy-paste from the normal scheduling code with
random changes. Why ?

Why can't this use ctx_sched_{in,out}() ? Surely the whole
CAP_PASSTHROUGHT thing is but a flag away.