2016-10-26 22:13 GMT+08:00 Radim Krčmář <rkrcmar@xxxxxxxxxx>: > 2016-10-26 14:35+0800, Wanpeng Li: >> 2016-10-25 19:55 GMT+08:00 Radim Krčmář <rkrcmar@xxxxxxxxxx>: >>> 2016-10-17 16:28+0800, Wanpeng Li: >> [...] >>>> +static void apic_timer_isr(isr_regs_t *regs) >>>> +{ >>>> + u64 now = rdtsc(); >>>> + ++tdt_count; >>>> + >>>> + if (table_idx < TABLE_SIZE && tdt_count > 1) >>>> + table[table_idx++] = now - exptime; >>>> + >>>> + if (breakmax && tdt_count > 1 && (now - exptime) > breakmax) { >>>> + hitmax = 1; >>>> + apic_write(APIC_EOI, 0); >>>> + return; >>>> + } >>>> + >>>> + exptime = now+delta; >>> >>> Two problems on this line: >>> >>> 1) with periodic timer, the delta is added to last expiration time, not >>> to now(). >>> What you are measuring now is the stability of the period -- very >>> important as well, but if we used now() only once, you could also >>> measure the latency. >>> >>> 2) delta is in nanoseconds while exptime is in cycles. Type error. >> >> There needs a method to convert the value which is loaded to >> initial-counter register to TSC. > > Yes, you need to know the conversion between APIC timer frequency and > TSC frequency. APIC timer in KVM uses nanoseconds, but TSC is variable. > >>> >>>> + >>>> + if (mode == APIC_LVT_TIMER_ONE_SHOT) >>>> + /* Set "Initial Counter Register", which starts the timer */ >>>> + apic_write(APIC_TMICT, delta); >>> >>> This is not deadline, so it would be better to read now() for exptime >>> right before the apic_write, so as little time as possible passes in >>> between. >> >> Do you mean something like this? >> >> static void apic_timer_isr(isr_regs_t *regs) >> { >> uint64_t now = rdtsc(); >> >> if (breakmax && (now - exptime) > breakmax) { > > Note that you must have now and exptime in the same unit to have > meaningful operations on. (especially the "exptime += delta" would bad > without conversion). > >> hitmax = 1; >> apic_write(APIC_EOI, 0); >> return; >> } >> >> exptime += delta; > > > The exptime for oneshot is 'exptime = now + delta', only periodic can do > exptime += delta, so I'd rather read the time again just before writing > the delta > >> if (mode == APIC_LVT_TIMER_ONE_SHOT) { > time_t now = now(); >> /* Set "Initial Counter Register", which starts the timer */ >> apic_write(APIC_TMICT, delta); > > exptime = now + delta; > } > >> if (table_idx < TABLE_SIZE) >> table[table_idx++] = now - exptime; >> >> apic_write(APIC_EOI, 0); >> } >> >> static void test_apic_timer(int mode) >> { >> handle_irq(APIC_LVT_TIMER_VECTOR, apic_timer_isr); >> irq_enable(); >> >> /* Periodic mode */ >> apic_write(APIC_LVTT, mode | APIC_LVT_TIMER_VECTOR); >> /* Divider == 1 */ >> apic_write(APIC_TDCR, 0x0000000b); >> exptime = rdtsc() + delta; > > Yep, if we convert rdtsc() to the same unit that delta uses. I just hardcode a tsc_ghz which is obtained from my host(it is calibrated to 3292.376MHz) for temporal testing. volatile uint64_t exptime, delta, delta_cycles; u32 tsc_ghz; int mode; #define TABLE_SIZE 100000 u64 table[TABLE_SIZE]; volatile int table_idx = 0; volatile int hitmax = 0; int breakmax = 0; #define APIC_LVT_TIMER_ONE_SHOT (0) #define APIC_LVT_TIMER_VECTOR (0xee) static void apic_timer_isr(isr_regs_t *regs) { uint64_t now = rdtsc(); if (breakmax && (now - exptime) > breakmax) { hitmax = 1; apic_write(APIC_EOI, 0); return; } if (table_idx < TABLE_SIZE) table[table_idx++] = now - exptime; if (mode == APIC_LVT_TIMER_ONE_SHOT) { now = rdtsc(); /* Set "Initial Counter Register", which starts the timer */ apic_write(APIC_TMICT, delta); exptime = now + delta_cycles; } else exptime += delta_cycles; apic_write(APIC_EOI, 0); } static void test_apic_timer(int mode) { handle_irq(APIC_LVT_TIMER_VECTOR, apic_timer_isr); irq_enable(); /* Periodic mode */ apic_write(APIC_LVTT, mode | APIC_LVT_TIMER_VECTOR); /* Divider == 1 */ apic_write(APIC_TDCR, 0x0000000b); exptime = rdtsc() + delta_cycles; apic_write(APIC_TMICT, delta); } int main(int argc, char **argv) { int i, size; u64 count = 0; setup_vm(); smp_init(); setup_idt(); test_lapic_existence(); mask_pic_interrupts(); delta = argc <= 2 ? 0x100000 : atol(argv[2]); /* Divider == 1*/ tsc_ghz = 3.292376; delta_cycles = delta * tsc_ghz; Oneshot mode, 100 thousands times set apic timer: vanilla kernel pause averge latency = 320629 patched kernel pause averge latency = 316319 However, periodic mode testcase is still not correct, I dump 10 times periodic mode latency, they increase step by step: APIC Timer Periodic Mode latency: 444015 latency: 905484 latency: 1076304 latency: 1356855 latency: 1602117 latency: 1907229 latency: 2153487 latency: 2549946 latency: 2820834 latency: 3135330 average latency = 1795160 In addition, I try to utilize cpuid 0x15 to get tsc frequency, however, there is no such leaf on my haswell desktop(Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz): Leaf Subleaf EAX EBX ECX EDX 00000000 00000000: 0000000d .... 756e6547 Genu 6c65746e ntel 49656e69 ineI 00000001 00000000: 000306c3 .... 00100800 .... 7ffafbff .... bfebfbff .... 00000002 00000000: 76036301 .c.v 00f0b6ff .... 00000000 .... 00c10000 .... 00000003 00000000: 00000000 .... 00000000 .... 00000000 .... 00000000 .... 00000004 00000000: 1c004121 !A.. 01c0003f ?... 0000003f ?... 00000000 .... 00000004 00000001: 1c004122 "A.. 01c0003f ?... 0000003f ?... 00000000 .... 00000004 00000002: 1c004143 CA.. 01c0003f ?... 000001ff .... 00000000 .... 00000004 00000003: 1c03c163 c... 02c0003f ?... 00001fff .... 00000006 .... 00000005 00000000: 00000040 @... 00000040 @... 00000003 .... 00042120 !.. 00000006 00000000: 00000077 w... 00000002 .... 00000009 .... 00000000 .... 00000007 00000000: 00000000 .... 000027ab .'.. 00000000 .... 00000000 .... 00000008 00000000: 00000000 .... 00000000 .... 00000000 .... 00000000 .... 00000009 00000000: 00000000 .... 00000000 .... 00000000 .... 00000000 .... 0000000a 00000000: 07300803 ..0. 00000000 .... 00000000 .... 00000603 .... 0000000b 00000000: 00000001 .... 00000001 .... 00000100 .... 00000000 .... 0000000b 00000001: 00000004 .... 00000004 .... 00000201 .... 00000000 .... 0000000c 00000000: 00000000 .... 00000000 .... 00000000 .... 00000000 .... 0000000d 00000000: 00000007 .... 00000340 @... 00000340 @... 00000000 .... 0000000d 00000001: 00000001 .... 00000000 .... 00000000 .... 00000000 .... 0000000d 00000002: 00000100 .... 00000240 @... 00000000 .... 00000000 .... 80000000 00000000: 80000008 .... 00000000 .... 00000000 .... 00000000 .... 80000001 00000000: 00000000 .... 00000000 .... 00000021 !... 2c100800 ..., 80000002 00000000: 65746e49 Inte 2952286c l(R) 726f4320 Cor 4d542865 e(TM 80000003 00000000: 35692029 ) i5 3935342d -459 50432030 0 CP 20402055 U @ 80000004 00000000: 30332e33 3.30 007a4847 GHz. 00000000 .... 00000000 .... 80000005 00000000: 00000000 .... 00000000 .... 00000000 .... 00000000 .... 80000006 00000000: 00000000 .... 00000000 .... 01006040 @`.. 00000000 .... 80000007 00000000: 00000000 .... 00000000 .... 00000000 .... 00000100 .... 80000008 00000000: 00003027 '0.. 00000000 .... 00000000 .... 00000000 .... Regards, Wanpeng Li -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html