On Thu, Sep 10, 2020 at 10:32:23AM +0200, peterz@xxxxxxxxxxxxx wrote: > > @@ -363,7 +363,14 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, > > static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, > > struct hw_perf_event *hwc, u64 config) > > { > > - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); > > + u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask; > > + > > + /* On Fam17h, the periodic fetch counter is set when IbsFetchEn is changed from 0 to 1 */ > > + if (perf_ibs == &perf_ibs_fetch && boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) > > + wrmsrl(hwc->config_base, _config); > A better option would be to use hwc->flags, you're loading from that > line already, so it's guaranteed hot and then you only have a single > branch. Or stick it in perf_ibs near enable_mask, same difference. I fixed it for you. --- struct perf_ibs { struct pmu pmu; /* 0 296 */ /* --- cacheline 4 boundary (256 bytes) was 40 bytes ago --- */ unsigned int msr; /* 296 4 */ /* XXX 4 bytes hole, try to pack */ u64 config_mask; /* 304 8 */ u64 cnt_mask; /* 312 8 */ /* --- cacheline 5 boundary (320 bytes) --- */ u64 enable_mask; /* 320 8 */ u64 valid_mask; /* 328 8 */ u64 max_period; /* 336 8 */ long unsigned int offset_mask[1]; /* 344 8 */ int offset_max; /* 352 4 */ unsigned int fetch_count_reset_broken:1; /* 356: 0 4 */ /* XXX 31 bits hole, try to pack */ struct cpu_perf_ibs * pcpu; /* 360 8 */ struct attribute * * format_attrs; /* 368 8 */ struct attribute_group format_group; /* 376 40 */ /* --- cacheline 6 boundary (384 bytes) was 32 bytes ago --- */ const struct attribute_group * attr_groups[2]; /* 416 16 */ u64 (*get_count)(u64); /* 432 8 */ /* size: 440, cachelines: 7, members: 15 */ /* sum members: 432, holes: 1, sum holes: 4 */ /* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 31 bits */ /* last cacheline: 56 bytes */ }; --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -89,6 +89,7 @@ struct perf_ibs { u64 max_period; unsigned long offset_mask[1]; int offset_max; + unsigned int fetch_count_reset_broken : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -370,7 +371,13 @@ perf_ibs_event_update(struct perf_ibs *p static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 _config = (hwc->config | config) & ~perf_ibs->enable_mask; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, _config); + + _config |= perf_ibs->enable_mask; + wrmsrl(hwc->config_base, _config); } /* @@ -756,6 +763,13 @@ static __init void perf_event_ibs_init(v { struct attribute **attr = ibs_op_format_attrs; + /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) {