On Sat, Jul 09, 2022, Aaron Lewis wrote: > diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h > index fdfd8e06fee6..016713b583bf 100644 > --- a/arch/x86/include/asm/kvm-x86-pmu-ops.h > +++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h > @@ -24,6 +24,7 @@ KVM_X86_PMU_OP(set_msr) > KVM_X86_PMU_OP(refresh) > KVM_X86_PMU_OP(init) > KVM_X86_PMU_OP(reset) > +KVM_X86_PMU_OP(get_event_mask) > KVM_X86_PMU_OP_OPTIONAL(deliver_pmi) > KVM_X86_PMU_OP_OPTIONAL(cleanup) > > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h > index 21614807a2cb..2964f3f15fb5 100644 > --- a/arch/x86/include/uapi/asm/kvm.h > +++ b/arch/x86/include/uapi/asm/kvm.h > @@ -522,6 +522,14 @@ struct kvm_pmu_event_filter { > #define KVM_PMU_EVENT_ALLOW 0 > #define KVM_PMU_EVENT_DENY 1 > > +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS (1u << 0) Is this a "flag" or a "type"? The usage in code isn't really clear, e.g. there's both if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) and if (flag == KVM_PMU_EVENT_FLAG_MASKED_EVENTS) > +#define KVM_PMU_EVENT_ENCODE_MASKED_EVENT(select, mask, match, invert) \ > + (((select) & 0xfful) | (((select) & 0xf00ul) << 24) | \ > + (((mask) & 0xfful) << 24) | \ > + (((match) & 0xfful) << 8) | \ > + (((invert) & 0x1ul) << 23)) Please add a comment showing the actual layout, this is extremely dense to parse. Alternatively, and probably my preference, would be to define this as a union. Or maybe both? E.g. so that userspace can do: filter[i].raw = KVM_PMU_EVENT_ENCODE_MASKED_EVENT(...); struct kvm_pmu_event_filter_entry { union { __u64 raw; struct { __u64 select_lo:8; __u64 match:8; __u64 rsvd1:7; __u64 invert:1; __u64 mask:8; __u64 select_hi:4; __u64 rsvd2:28; }; } } And that begs the question of whether or not KVM should check those "rsvd" fields. IIUC, this layout directly correlates to hardware, and hardware defines many of the "rsvd1" bits. Are those just a don't care? > /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ > #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ > #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c > index 3f868fed9114..99c02bbb8f32 100644 > --- a/arch/x86/kvm/pmu.c > +++ b/arch/x86/kvm/pmu.c > @@ -197,14 +197,106 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) > return true; > } > > -static int cmp_u64(const void *pa, const void *pb) > +static inline u64 get_event(u64 eventsel) > +{ > + return eventsel & AMD64_EVENTSEL_EVENT; > +} > + > +static inline u8 get_unit_mask(u64 eventsel) > +{ > + return (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; > +} > + > +static inline u8 get_counter_mask(u64 eventsel) > { > - u64 a = *(u64 *)pa; > - u64 b = *(u64 *)pb; > + return (eventsel & ARCH_PERFMON_EVENTSEL_CMASK) >> 24; > +} > + > +static inline bool get_invert_comparison(u64 eventsel) > +{ > + return !!(eventsel & ARCH_PERFMON_EVENTSEL_INV); > +} > > +static inline int cmp_safe64(u64 a, u64 b) > +{ > return (a > b) - (a < b); > } > > +static int cmp_eventsel_event(const void *pa, const void *pb) > +{ > + return cmp_safe64(*(u64 *)pa & AMD64_EVENTSEL_EVENT, > + *(u64 *)pb & AMD64_EVENTSEL_EVENT); Why is common x86 code reference AMD64 masks? If "AMD64" here just means the x86-64 / AMD64 architecture, i.e. is common to AMD and Intel, a comment to call that out would be helpful. > +} > + > +static int cmp_u64(const void *pa, const void *pb) > +{ > + return cmp_safe64(*(u64 *)pa, > + *(u64 *)pb); > +} > + > +static inline bool is_match(u64 masked_event, u64 eventsel) Maybe "is_filter_entry_match"? > +{ > + u8 mask = get_counter_mask(masked_event); > + u8 match = get_unit_mask(masked_event); > + u8 unit_mask = get_unit_mask(eventsel); > + > + return (unit_mask & mask) == match; > +} > + > +static inline bool is_inverted(u64 masked_event) > +{ > + return get_invert_comparison(masked_event); > +} This is a pointless wrapper. I suspect you added it to keep line lengths short, but there are better ways to accomplish that. More below. > + > +static bool is_filtered(struct kvm_pmu_event_filter *filter, u64 eventsel, > + bool invert) > +{ > + u64 key = get_event(eventsel); > + u64 *event, *evt; > + > + event = bsearch(&key, filter->events, filter->nevents, sizeof(u64), > + cmp_eventsel_event); > + > + if (event) { > + /* Walk the masked events backward looking for a match. */ This isn't a very helpful comment. It's obvious enough from the code that this walks backwards looking for a match, while the next one walks forward. But it's not immediately obvious _why_ that's the behavior. I eventually realized the code is looking for _any_ match, but a comment above this function documenting that would would be very helpful. And doesn't this approach yield somewhat arbitrary behavior? If there are multiple filters for > + for (evt = event; evt >= filter->events && > + get_event(*evt) == get_event(eventsel); evt--) The outer for-loop needs curly braces. See "3) Placing Braces and Spaces" in Documentation/process/coding-style.rst. The "get_event() == get_event()" I have a strong dislike for arithmetic on pointers that aren't a single byte. I don't think it's entirely avoidable, but it could be contained (see below)., > + if (is_inverted(*evt) == invert && is_match(*evt, eventsel)) The is_inverted() helper can be avoided by handling the inversion check in is_match(). if (is_match(*event, eventsel, invert) if (entry) { pivot = <compute index of found entry> /* * comment about how the key works and wanting to find any * matching filter entry. */ for (i = pivot; i > 0; i--) { entry = &filter->events[i]; if (get_event(*entry) != get_event(eventsel) break; if (is_match(*entry, eventsel, invert)) return true; } for (i = pivot + 1; i < filter->nevents; i++) { entry = &filter->events[i]; if (get_event(*entry) != get_event(eventsel) break; if (is_match(*entry, eventsel, invert)) return true; } } > + return true; > + > + /* Walk the masked events forward looking for a match. */ > + for (evt = event + 1; > + evt < (filter->events + filter->nevents) && > + get_event(*evt) == get_event(eventsel); evt++) > + if (is_inverted(*evt) == invert && is_match(*evt, eventsel)) > + return true; > + } > + > + return false; > +} > + > +static bool allowed_by_masked_events(struct kvm_pmu_event_filter *filter, > + u64 eventsel) > +{ > + if (is_filtered(filter, eventsel, /*invert=*/false)) Outer if-statement needs curly braces. But even better would be to do: if (is_filter(...) && !is_filter(...)) return filter->action == KVM_PMU_EVENT_ALLOW; That said, I have zero idea what the intended logic is, i.e. this needs a verbose comment. Maybe if I was actually familiar with PMU event magic it would be obvious, but I won't be the last person that reads this code with only a passing undertsanding of PMU events. Specifically, having an inverted flag _and_ an inverted bit in the event mask is confusing. > + if (!is_filtered(filter, eventsel, /*invert=*/true)) > + return filter->action == KVM_PMU_EVENT_ALLOW; > + > + return filter->action == KVM_PMU_EVENT_DENY; > +} > + > +static bool allowed_by_default_events(struct kvm_pmu_event_filter *filter, > + u64 eventsel) > +{ > + u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; Again, a comment would be helpful. What is a "default" event? Why does that use a "raw" mask as they key? But the "default" part can be avoided entirely, see below. > + > + if (bsearch(&key, filter->events, filter->nevents, > + sizeof(u64), cmp_u64)) Let this poke out, it's only 82 chars. Though I would say add a helper to do the search. That will also make it easier to use a unionized bitfield, e.g. static __always_inline u64 *find_filter_entry(struct kvm_pmu_event_filter *filter, u64 key, cmp_func_t cmp_fn) { struct kvm_pmu_event_filter_entry *entry; entry = bsearch(&key, filter->events, filter->nevents, sizeof(filter->events[0]), cmp_fn); if (!entry) return NULL; return entry->raw; } > + return filter->action == KVM_PMU_EVENT_ALLOW; > + > + return filter->action == KVM_PMU_EVENT_DENY; > +} > + > void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) > { > u64 config; > @@ -226,14 +318,11 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) > > filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); > if (filter) { > - __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; > - > - if (bsearch(&key, filter->events, filter->nevents, > - sizeof(__u64), cmp_u64)) > - allow_event = filter->action == KVM_PMU_EVENT_ALLOW; > - else > - allow_event = filter->action == KVM_PMU_EVENT_DENY; > + allow_event = (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) ? > + allowed_by_masked_events(filter, eventsel) : > + allowed_by_default_events(filter, eventsel); Using a ternary operator isn't buying much. E.g. IMO this is more readable: if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) allow_event = allowed_by_masked_events(filter, eventsel); else allow_event = allowed_by_default_events(filter, eventsel); But again, better to avoid this entirely. > } > + > if (!allow_event) Checking "allow_event" outside of the "if (filter)" is unnecessary and confusing. > return; Rather than have separate helpers for "masked" versus "default", just have a single "is_event_allowed" static bool is_event_allowed(struct kvm_pmu_event_filter *filter, u64 eventsel) { u64 key; if (filter->flags & KVM_PMU_EVENT_FLAG_MASKED_EVENTS) { if (is_filtered(filter, eventsel, false) && !is_filtered(filter, eventsel, true)) return filter->action == KVM_PMU_EVENT_ALLOW; goto not_filtered; } key = eventsel & AMD64_RAW_EVENT_MASK_NB; if (find_filter_entry(filter, key, cmp_u64)) return filter->action == KVM_PMU_EVENT_ALLOW; not_filtered: return filter->action == KVM_PMU_EVENT_DENY; } And then this is succint and readable: filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); if (filter && !is_event_allowed(filter, eventsel)) return;