On 2024-10-11 2:42 p.m., Peter Zijlstra wrote: > > Can you rework this one along these lines? Sure. I probably also add macros to replace the magic number 0 and 1. For example, #define T_TOTAL 0 #define T_GUEST 1 Thanks, Kan > > --- > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -975,6 +975,7 @@ struct perf_event_context { > * Context clock, runs when context enabled. > */ > struct perf_time_ctx time; > + struct perf_time_ctx timeguest; > > /* > * These fields let us detect when two contexts have both > @@ -1066,6 +1067,7 @@ struct bpf_perf_event_data_kern { > */ > struct perf_cgroup_info { > struct perf_time_ctx time; > + struct perf_time_ctx timeguest; > int active; > }; > > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -782,12 +782,44 @@ static inline int is_cgroup_event(struct > return event->cgrp != NULL; > } > > +static_assert(offsetof(struct perf_event, timeguest) - > + offsetof(struct perf_event, time) == > + sizeof(struct perf_time_ctx)); > + > +static_assert(offsetof(struct perf_cgroup_info, timeguest) - > + offsetof(struct perf_cgroup_info, time) == > + sizeof(struct perf_time_ctx)); > + > +static inline u64 __perf_event_time_ctx(struct perf_event *event, > + struct perf_time_ctx *times) > +{ > + u64 time = times[0].time; > + if (event->attr.exclude_guest) > + time -= times[1].time; > + return time; > +} > + > +static inline u64 __perf_event_time_ctx_now(struct perf_event *event, > + struct perf_time_ctx *times, > + u64 now) > +{ > + if (event->attr.exclude_guest) { > + /* > + * (now + times[0].offset) - (now + times[1].offset) := > + * times[0].offset - times[1].offset > + */ > + return READ_ONCE(times[0].offset) - READ_ONCE(times[1].offset); > + } > + > + return now + READ_ONCE(times[0].offset); > +} > + > static inline u64 perf_cgroup_event_time(struct perf_event *event) > { > struct perf_cgroup_info *t; > > t = per_cpu_ptr(event->cgrp->info, event->cpu); > - return t->time.time; > + return __perf_event_time_ctx(event, &t->time); > } > > static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) > @@ -796,12 +828,12 @@ static inline u64 perf_cgroup_event_time > > t = per_cpu_ptr(event->cgrp->info, event->cpu); > if (!__load_acquire(&t->active)) > - return t->time.time; > + return __perf_event_time_ctx(event, &t->time); > now += READ_ONCE(t->time.offset); > - return now; > + return __perf_event_time_ctx_now(event, &t->time, now); > } > > -static inline void update_perf_time_ctx(struct perf_time_ctx *time, u64 now, bool adv) > +static inline void __update_perf_time_ctx(struct perf_time_ctx *time, u64 now, bool adv) > { > if (adv) > time->time += now - time->stamp; > @@ -819,6 +851,13 @@ static inline void update_perf_time_ctx( > WRITE_ONCE(time->offset, time->time - time->stamp); > } > > +static inline void update_perf_time_ctx(struct perf_time_ctx *time, u64 now, bool adv) > +{ > + __update_perf_time_ctx(time + 0, now, adv); > + if (__this_cpu_read(perf_in_guest)) > + __update_perf_time_ctx(time + 1, now, adv) > +} > + > static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) > { > struct perf_cgroup *cgrp = cpuctx->cgrp; >