+Jacob Pan >-----Original Message----- >From: linux-rt-users-owner@xxxxxxxxxxxxxxx <linux-rt-users- >owner@xxxxxxxxxxxxxxx> On Behalf Of Ralf Ramsauer >Sent: Wednesday, September 19, 2018 9:16 AM >To: John Kacur <jkacur@xxxxxxxxxx>; Jan Kiszka <jan.kiszka@xxxxxxxxxxx>; >Brown, Len <len.brown@xxxxxxxxx> >Cc: Daniel Bristot de Oliveira <daniel@xxxxxxxxxx>; Daniel Bristot de Oliveira ><bristot@xxxxxxxxxx>; Clark Williams <williams@xxxxxxxxxx>; linux-rt-users ><linux-rt-users@xxxxxxxxxxxxxxx>; Carsten Emde <C.Emde@xxxxxxxxx> >Subject: Re: [PATCH 1/2] cyclictest: SMI count/detection via MSR/SMI counter > >On 9/19/18 5:25 PM, John Kacur wrote: >> >> >> On Tue, 4 Sep 2018, Jan Kiszka wrote: >> >>> On 2018-09-04 18:56, Daniel Bristot de Oliveira wrote: >>>> >>>> >>>> On 09/04/2018 06:25 PM, Ralf Ramsauer wrote: >>>>> >>>>> Hi Daniel, >>>>> >>>>> On 21/01/2016 19:08, Daniel Bristot de Oliveira wrote: >>>>>> Use the MSR/SMI counter on Intel's processor to detect/count SMIs. >>>>>> It is based on turbostat's implementation. >>>>>> >>>>>> SMI counting is enabled via --smi argument. When enabled, and >>>>>> additional field is added on both regular and verbose ouput. >>>>>> >>>>>> On the regular output, a SMI column shows how many SMIs occurred >>>>>> on each CPU during cyclictest's execution. For example: >>>>>> >>>>>> policy: fifo: loadavg: 0.09 0.05 0.02 1/194 2288 >>>>>> >>>>>> T: 0 ( 2285) P:80 I:1000 C: 9975 Min: 2 Act: 3 Avg: 4 Max: >>>>>> T: 20831 SMI: 2 >>>>>> T: 1 ( 2286) P:80 I:1500 C: 6650 Min: 2 Act: 2 Avg: 5 Max: >>>>>> T: 19910 SMI: 2 >>>>>> T: 2 ( 2287) P:80 I:2000 C: 4987 Min: 2 Act: 2 Avg: 6 Max: >>>>>> T: 20811 SMI: 2 >>>>>> T: 3 ( 2288) P:80 I:2500 C: 3990 Min: 2 Act: 3 Avg: 7 Max: >>>>>> T: 20322 SMI: 2 >>>>>> >>>>>> On verbose output, the last column shows how many SMIs occurred on >>>>>> each loop. For example: >>>>>> >>>>>> [ CPU | Loop | Lat | SMIs ] >>>>>> 0: 2156: 2 0 >>>>>> 0: 2157: 2 0 >>>>>> 0: 2158: 2 0 >>>>>> 0: 2159: 20981 2 >>>>>> 1: 1433: 2 0 >>>>>> 1: 1434: 2 0 >>>>>> 1: 1435: 2 0 >>>>>> 1: 1436: 3 0 >>>>>> 1: 1437: 2 0 >>>>>> 1: 1438: 2 0 >>>>>> 1: 1439: 20249 2 >>>>>> >>>>>> Signed-off-by: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx> >>>>>> --- >>>>>> src/cyclictest/cyclictest.c | 229 >>>>>> ++++++++++++++++++++++++++++++++++++++++++-- >>>>>> 1 file changed, 219 insertions(+), 10 deletions(-) >>>>>> >>>>>> diff --git a/src/cyclictest/cyclictest.c >>>>>> b/src/cyclictest/cyclictest.c index f5a67dc..df3db2a 100644 >>>>>> --- a/src/cyclictest/cyclictest.c >>>>>> +++ b/src/cyclictest/cyclictest.c >>>>>> @@ -111,6 +111,13 @@ extern int clock_nanosleep(clockid_t >>>>>> __clock_id, int __flags, >>>>>> #define KVARNAMELEN 32 >>>>>> #define KVALUELEN 32 >>>>>> >>>>>> +#if (defined(__i386__) || defined(__x86_64__)) #define >>>>>> +ARCH_HAS_SMI_COUNTER #endif >>>>>> + >>>>>> +#define MSR_SMI_COUNT 0x00000034 >>>>>> +#define MSR_SMI_COUNT_MASK 0xFFFFFFFF >>>>>> + >>>>>> int enable_events; >>>>>> >>>>>> static char *policyname(int policy); @@ -143,6 +150,7 @@ struct >>>>>> thread_param { >>>>>> int cpu; >>>>>> int node; >>>>>> int tnum; >>>>>> + int msr_fd; >>>>>> }; >>>>>> >>>>>> /* Struct for statistics */ >>>>>> @@ -154,6 +162,7 @@ struct thread_stat { >>>>>> long act; >>>>>> double avg; >>>>>> long *values; >>>>>> + long *smis; >>>>>> long *hist_array; >>>>>> long *outliers; >>>>>> pthread_t thread; >>>>>> @@ -164,6 +173,7 @@ struct thread_stat { >>>>>> long cycleofmax; >>>>>> long hist_overflow; >>>>>> long num_outliers; >>>>>> + unsigned long smi_count; >>>>>> }; >>>>>> >>>>>> static pthread_mutex_t trigger_lock = PTHREAD_MUTEX_INITIALIZER; >>>>>> @@ -212,6 +222,12 @@ static pthread_t fifo_threadid; >>>>>> static int laptop = 0; >>>>>> static int use_histfile = 0; >>>>>> >>>>>> +#ifdef ARCH_HAS_SMI_COUNTER >>>>>> +static int smi = 0; >>>>>> +#else >>>>>> +#define smi 0 >>>>>> +#endif >>>>>> + >>>>>> static pthread_cond_t refresh_on_max_cond = >PTHREAD_COND_INITIALIZER; >>>>>> static pthread_mutex_t refresh_on_max_lock = >>>>>> PTHREAD_MUTEX_INITIALIZER; >>>>>> >>>>>> @@ -772,6 +788,125 @@ try_again: >>>>>> return err; >>>>>> } >>>>>> >>>>>> +#ifdef ARCH_HAS_SMI_COUNTER >>>>>> +static int open_msr_file(int cpu) { >>>>>> + int fd; >>>>>> + char pathname[32]; >>>>>> + >>>>>> + /* SMI needs thread affinity */ >>>>>> + sprintf(pathname, "/dev/cpu/%d/msr", cpu); >>>>>> + fd = open(pathname, O_RDONLY); >>>>>> + if (fd < 0) >>>>>> + warn("%s open failed, try chown or chmod +r " >>>>>> + "/dev/cpu/*/msr, or run as root\n", pathname); >>>>>> + >>>>>> + return fd; >>>>>> +} >>>>>> + >>>>>> +static int get_msr(int fd, off_t offset, unsigned long long *msr) >>>>>> +{ >>>>>> + ssize_t retval; >>>>>> + >>>>>> + retval = pread(fd, msr, sizeof *msr, offset); >>>>>> + >>>>>> + if (retval != sizeof *msr) >>>>>> + return 1; >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> +static int get_smi_counter(int fd, unsigned long *counter) { >>>>>> + int retval; >>>>>> + unsigned long long msr; >>>>>> + >>>>>> + retval = get_msr(fd, MSR_SMI_COUNT, &msr); >>>>>> + if (retval) >>>>>> + return retval; >>>>>> + >>>>>> + *counter = (unsigned long) (msr & MSR_SMI_COUNT_MASK); >>>>>> + >>>>>> + return 0; >>>>>> +} >>>>>> + >>>>>> +#include <cpuid.h> >>>>>> + >>>>>> +/* Based on turbostat's check */ >>>>>> +static int has_smi_counter(void) >>>>>> +{ >>>>>> + unsigned int ebx, ecx, edx, max_level; >>>>>> + unsigned int fms, family, model; >>>>>> + >>>>>> + fms = family = model = ebx = ecx = edx = 0; >>>>>> + >>>>>> + __get_cpuid(0, &max_level, &ebx, &ecx, &edx); >>>>>> + >>>>>> + /* check genuine intel */ >>>>>> + if (!(ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)) >>>>>> + return 0; >>>>>> + >>>>>> + __get_cpuid(1, &fms, &ebx, &ecx, &edx); >>>>>> + family = (fms >> 8) & 0xf; >>>>>> + >>>>>> + if (family != 6) >>>>>> + return 0; >>>>>> + >>>>>> + /* no MSR */ >>>>>> + if (!(edx & (1 << 5))) >>>>>> + return 0; >>>>>> + >>>>>> + model = (((fms >> 16) & 0xf) << 4) + ((fms >> 4) & 0xf); >>>>>> + >>>>>> + switch (model) { >>>>>> + case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown >>>>>> NHM-EP */ >>>>>> + case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, >>>>>> Jasper Forest */ >>>>>> + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ >>>>>> + case 0x25: /* Westmere Client - Clarkdale, Arrandale */ >>>>>> + case 0x2C: /* Westmere EP - Gulftown */ >>>>>> + case 0x2E: /* Nehalem-EX Xeon - Beckton */ >>>>>> + case 0x2F: /* Westmere-EX Xeon - Eagleton */ >>>>>> + case 0x2A: /* SNB */ >>>>>> + case 0x2D: /* SNB Xeon */ >>>>>> + case 0x3A: /* IVB */ >>>>>> + case 0x3E: /* IVB Xeon */ >>>>>> + case 0x3C: /* HSW */ >>>>>> + case 0x3F: /* HSX */ >>>>>> + case 0x45: /* HSW */ >>>>>> + case 0x46: /* HSW */ >>>>>> + case 0x3D: /* BDW */ >>>>>> + case 0x47: /* BDW */ >>>>>> + case 0x4F: /* BDX */ >>>>> >>>>> sorry for digging out this old patch. >>>>> >>>>> Are you sure that 0x4F has MSR_MSI_COUNT? I'm currently sitting in >>>>> front of such a box (Xeon E5-2683), and I doubt the value in this >>>>> MSR is correct for this model, it's way too high… >>>> >>>> This implementation is based in the turbostat.c (which is part of kernel): >>>> >>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/t >>>> ree/tools/power/x86/turbostat/turbostat.c#n3139 >>>> >>>> So, other than doubting, do you have a list of CPU models that >>>> support it, saying this cpumodel is not supported? >>>> >>>> If so, report to turbostat.c developer and put me in Cc:. >>> >>> Our list is based on Intel SDM, Volume 4 (MSRs). In section 2.23, >>> there is an MSR index which associates MSRs with Family_Model IDs. >>> But it may not be complete as well, we already found at least one >>> inconsistency (Xeon Phi is 06_57h and 06_85h, but only the former was listed >there). >>> >>> Jan >>> >> >> You guys are discussing a very old patch here, just want to make sure >> I didn't miss any updates, new patches? >> >> John >> > >Hi John, > >No, not yet. Summary: the above listed list of models contains some models that >don't support MSR_SMI_COUNT. According to Daniel, this list was c&p'ed from >Linux's turbostat. Maybe I missed something, so I'm still waiting for a response >from the turbostat maintainer (*ping* :) ). >Otherwise I'll follow up with a patch for cyclictest. > >Thanks > Ralf