Hi Daniel, On 21/01/2016 19:08, Daniel Bristot de Oliveira wrote: > Use the MSR/SMI counter on Intel's processor to detect/count SMIs. It is > based on turbostat's implementation. > > SMI counting is enabled via --smi argument. When enabled, and additional > field is added on both regular and verbose ouput. > > On the regular output, a SMI column shows how many SMIs occurred on > each CPU during cyclictest's execution. For example: > > policy: fifo: loadavg: 0.09 0.05 0.02 1/194 2288 > > T: 0 ( 2285) P:80 I:1000 C: 9975 Min: 2 Act: 3 Avg: 4 Max: 20831 SMI: 2 > T: 1 ( 2286) P:80 I:1500 C: 6650 Min: 2 Act: 2 Avg: 5 Max: 19910 SMI: 2 > T: 2 ( 2287) P:80 I:2000 C: 4987 Min: 2 Act: 2 Avg: 6 Max: 20811 SMI: 2 > T: 3 ( 2288) P:80 I:2500 C: 3990 Min: 2 Act: 3 Avg: 7 Max: 20322 SMI: 2 > > On verbose output, the last column shows how many SMIs occurred > on each loop. For example: > > [ CPU | Loop | Lat | SMIs ] > 0: 2156: 2 0 > 0: 2157: 2 0 > 0: 2158: 2 0 > 0: 2159: 20981 2 > 1: 1433: 2 0 > 1: 1434: 2 0 > 1: 1435: 2 0 > 1: 1436: 3 0 > 1: 1437: 2 0 > 1: 1438: 2 0 > 1: 1439: 20249 2 > > Signed-off-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx> > --- > src/cyclictest/cyclictest.c | 229 ++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 219 insertions(+), 10 deletions(-) > > diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c > index f5a67dc..df3db2a 100644 > --- a/src/cyclictest/cyclictest.c > +++ b/src/cyclictest/cyclictest.c > @@ -111,6 +111,13 @@ extern int clock_nanosleep(clockid_t __clock_id, int __flags, > #define KVARNAMELEN 32 > #define KVALUELEN 32 > > +#if (defined(__i386__) || defined(__x86_64__)) > +#define ARCH_HAS_SMI_COUNTER > +#endif > + > +#define MSR_SMI_COUNT 0x00000034 > +#define MSR_SMI_COUNT_MASK 0xFFFFFFFF > + > int enable_events; > > static char *policyname(int policy); > @@ -143,6 +150,7 @@ struct thread_param { > int cpu; > int node; > int tnum; > + int msr_fd; > }; > > /* Struct for statistics */ > @@ -154,6 +162,7 @@ struct thread_stat { > long act; > double avg; > long *values; > + long *smis; > long *hist_array; > long *outliers; > pthread_t thread; > @@ -164,6 +173,7 @@ struct thread_stat { > long cycleofmax; > long hist_overflow; > long num_outliers; > + unsigned long smi_count; > }; > > static pthread_mutex_t trigger_lock = PTHREAD_MUTEX_INITIALIZER; > @@ -212,6 +222,12 @@ static pthread_t fifo_threadid; > static int laptop = 0; > static int use_histfile = 0; > > +#ifdef ARCH_HAS_SMI_COUNTER > +static int smi = 0; > +#else > +#define smi 0 > +#endif > + > static pthread_cond_t refresh_on_max_cond = PTHREAD_COND_INITIALIZER; > static pthread_mutex_t refresh_on_max_lock = PTHREAD_MUTEX_INITIALIZER; > > @@ -772,6 +788,125 @@ try_again: > return err; > } > > +#ifdef ARCH_HAS_SMI_COUNTER > +static int open_msr_file(int cpu) > +{ > + int fd; > + char pathname[32]; > + > + /* SMI needs thread affinity */ > + sprintf(pathname, "/dev/cpu/%d/msr", cpu); > + fd = open(pathname, O_RDONLY); > + if (fd < 0) > + warn("%s open failed, try chown or chmod +r " > + "/dev/cpu/*/msr, or run as root\n", pathname); > + > + return fd; > +} > + > +static int get_msr(int fd, off_t offset, unsigned long long *msr) > +{ > + ssize_t retval; > + > + retval = pread(fd, msr, sizeof *msr, offset); > + > + if (retval != sizeof *msr) > + return 1; > + > + return 0; > +} > + > +static int get_smi_counter(int fd, unsigned long *counter) > +{ > + int retval; > + unsigned long long msr; > + > + retval = get_msr(fd, MSR_SMI_COUNT, &msr); > + if (retval) > + return retval; > + > + *counter = (unsigned long) (msr & MSR_SMI_COUNT_MASK); > + > + return 0; > +} > + > +#include <cpuid.h> > + > +/* Based on turbostat's check */ > +static int has_smi_counter(void) > +{ > + unsigned int ebx, ecx, edx, max_level; > + unsigned int fms, family, model; > + > + fms = family = model = ebx = ecx = edx = 0; > + > + __get_cpuid(0, &max_level, &ebx, &ecx, &edx); > + > + /* check genuine intel */ > + if (!(ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)) > + return 0; > + > + __get_cpuid(1, &fms, &ebx, &ecx, &edx); > + family = (fms >> 8) & 0xf; > + > + if (family != 6) > + return 0; > + > + /* no MSR */ > + if (!(edx & (1 << 5))) > + return 0; > + > + model = (((fms >> 16) & 0xf) << 4) + ((fms >> 4) & 0xf); > + > + switch (model) { > + case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ > + case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ > + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ > + case 0x25: /* Westmere Client - Clarkdale, Arrandale */ > + case 0x2C: /* Westmere EP - Gulftown */ > + case 0x2E: /* Nehalem-EX Xeon - Beckton */ > + case 0x2F: /* Westmere-EX Xeon - Eagleton */ > + case 0x2A: /* SNB */ > + case 0x2D: /* SNB Xeon */ > + case 0x3A: /* IVB */ > + case 0x3E: /* IVB Xeon */ > + case 0x3C: /* HSW */ > + case 0x3F: /* HSX */ > + case 0x45: /* HSW */ > + case 0x46: /* HSW */ > + case 0x3D: /* BDW */ > + case 0x47: /* BDW */ > + case 0x4F: /* BDX */ sorry for digging out this old patch. Are you sure that 0x4F has MSR_MSI_COUNT? I'm currently sitting in front of such a box (Xeon E5-2683), and I doubt the value in this MSR is correct for this model, it's way too high… I checked Intel's SDM v4: MSR refman (as of Jul'17), and it affirms that 0x4f doesn't have a MSR_MSI_COUNT. Compared to Intel's SDM, this list of supported models somehow seems too long. Thanks Ralf