On Thu, Aug 01, 2024 at 04:58:18AM +0000, Mingwei Zhang wrote: > @@ -3334,9 +3401,15 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) > * would only update time for the pinned events. > */ > if (is_active & EVENT_TIME) { > + bool stop; > + > + /* vPMU should not stop time */ > + stop = !(event_type & EVENT_GUEST) && > + ctx == &cpuctx->ctx; > + > /* update (and stop) ctx time */ > update_context_time(ctx); > - update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); > + update_cgrp_time_from_cpuctx(cpuctx, stop); > /* > * CPU-release for the below ->is_active store, > * see __load_acquire() in perf_event_time_now() > @@ -3354,7 +3427,18 @@ ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) > cpuctx->task_ctx = NULL; > } > > - is_active ^= ctx->is_active; /* changed bits */ > + if (event_type & EVENT_GUEST) { > + /* > + * Schedule out all !exclude_guest events of PMU > + * with PERF_PMU_CAP_PASSTHROUGH_VPMU. > + */ I thought the premise was that if we have !exclude_guest events, we'll fail the creation of vPMU, and if we have vPMU we'll fail the creation of !exclude_guest events. As such, they're mutually exclusive, and the above comment doesn't make sense, if we have a vPMU, there are no !exclude_guest events, IOW schedule out the entire context. > + is_active = EVENT_ALL; > + __update_context_guest_time(ctx, false); > + perf_cgroup_set_timestamp(cpuctx, true); > + barrier(); > + } else { > + is_active ^= ctx->is_active; /* changed bits */ > + } > > list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) { > if (perf_skip_pmu_ctx(pmu_ctx, event_type)) > @@ -3864,13 +3953,22 @@ static int merge_sched_in(struct perf_event *event, void *data) > if (!event_filter_match(event)) > return 0; > > - if (group_can_go_on(event, *can_add_hw)) { > + /* > + * Don't schedule in any exclude_guest events of PMU with > + * PERF_PMU_CAP_PASSTHROUGH_VPMU, while a guest is running. > + */ More confusion; if we have vPMU there should not be !exclude_guest events, right? So the above then becomes 'Don't schedule in any events'. > + if (__this_cpu_read(perf_in_guest) && event->attr.exclude_guest && > + event->pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU && > + !(msd->event_type & EVENT_GUEST)) > + return 0; > + > + if (group_can_go_on(event, msd->can_add_hw)) { > if (!group_sched_in(event, ctx)) > list_add_tail(&event->active_list, get_event_list(event)); > @@ -3945,7 +4049,23 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) > WARN_ON_ONCE(cpuctx->task_ctx != ctx); > } > > - is_active ^= ctx->is_active; /* changed bits */ > + if (event_type & EVENT_GUEST) { > + /* > + * Schedule in all !exclude_guest events of PMU > + * with PERF_PMU_CAP_PASSTHROUGH_VPMU. > + */ Idem. > + is_active = EVENT_ALL; > + > + /* > + * Update ctx time to set the new start time for > + * the exclude_guest events. > + */ > + update_context_time(ctx); > + update_cgrp_time_from_cpuctx(cpuctx, false); > + barrier(); > + } else { > + is_active ^= ctx->is_active; /* changed bits */ > + }