I'm into an issue when I compile kvm-unit-tests with a new-ish Clang version. It results in a failure similar to this: Serial contents after VMM exited: SeaBIOS (version 1.8.2-20160510_123855-google) Total RAM Size = 0x0000000100000000 = 4096 MiB CPU Mhz=2000 CPUs found: 1 Max CPUs supported: 1 Booting from ROM... enabling apic paging enabled cr0 = 80010011 cr3 = bfefc000 cr4 = 20 PMU version: 4 GP counters: 3 GP counter width: 48 Mask length: 7 Fixed counters: 3 Fixed counter width: 48 ---8<--- PASS: all counters FAIL: overflow: cntr-0 PASS: overflow: status-0 PASS: overflow: status clear-0 PASS: overflow: irq-0 FAIL: overflow: cntr-1 PASS: overflow: status-1 PASS: overflow: status clear-1 PASS: overflow: irq-1 FAIL: overflow: cntr-2 PASS: overflow: status-2 PASS: overflow: status clear-2 PASS: overflow: irq-2 FAIL: overflow: cntr-3 PASS: overflow: status-3 PASS: overflow: status clear-3 PASS: overflow: irq-3 ---8<--- It turns out that newer Clangs are much more aggressive at inlining than GCC. I could replicate this failure with GCC with the patch below[1] (the patch probably isn't minimal). If I add the "noinline" attribute "measure()" in the patch below, the test passes. Is there a subtle assumption being made by the test that breaks with aggressive inlining? If so, is adding the "noinline" attribute to "measure()" the correct fix, or should the test be made more robust? -bw [1] diff --git a/x86/pmu.c b/x86/pmu.c index a46bdbf4788c..4295e0c83aa0 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -104,7 +104,7 @@ static int num_counters; char *buf; -static inline void loop(void) +static __always_inline void loop(void) { unsigned long tmp, tmp2, tmp3; @@ -144,7 +144,7 @@ static int event_to_global_idx(pmu_counter_t *cnt) (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX)); } -static struct pmu_event* get_counter_event(pmu_counter_t *cnt) +static __always_inline struct pmu_event* get_counter_event(pmu_counter_t *cnt) { if (is_gp(cnt)) { int i; @@ -158,7 +158,7 @@ static struct pmu_event* get_counter_event(pmu_counter_t *cnt) return (void*)0; } -static void global_enable(pmu_counter_t *cnt) +static __always_inline void global_enable(pmu_counter_t *cnt) { cnt->idx = event_to_global_idx(cnt); @@ -166,14 +166,14 @@ static void global_enable(pmu_counter_t *cnt) (1ull << cnt->idx)); } -static void global_disable(pmu_counter_t *cnt) +static __always_inline void global_disable(pmu_counter_t *cnt) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) & ~(1ull << cnt->idx)); } -static void start_event(pmu_counter_t *evt) +static __always_inline void start_event(pmu_counter_t *evt) { wrmsr(evt->ctr, evt->count); if (is_gp(evt)) @@ -197,7 +197,7 @@ static void start_event(pmu_counter_t *evt) apic_write(APIC_LVTPC, PC_VECTOR); } -static void stop_event(pmu_counter_t *evt) +static __always_inline void stop_event(pmu_counter_t *evt) { global_disable(evt); if (is_gp(evt)) @@ -211,7 +211,7 @@ static void stop_event(pmu_counter_t *evt) evt->count = rdmsr(evt->ctr); } -static void measure(pmu_counter_t *evt, int count) +static __always_inline void measure(pmu_counter_t *evt, int count) { int i; for (i = 0; i < count; i++)