The patch titled i386/apic: Rework local apic timer calibration has been removed from the -mm tree. Its filename was i386-lapic-timer-calibration.patch This patch was dropped because an updated version will be merged ------------------------------------------------------ Subject: i386/apic: Rework local apic timer calibration From: Thomas Gleixner <tglx@xxxxxxxxxxxxx> The local apic timer calibration has two problem cases: 1. The calibration is based on readout of the PIT/HPET timer to detect the wrap of the periodic tick. It happens that a box gets stuck in the calibration loop due to a PIT with a broken readout function. 2. CoreDuo boxen show a sporadic PIT runs too slow defect, which results in a wrong lapic calibration. The PIT goes back to normal operation once the lapic timer is switched to periodic mode. Rework the code to address both problems: - Make the calibration interrupt driven. This removes the wait_timer_tick magic hackery from lapic.c and time_hpet.c. The clockevents framework allows easy substitution of the global tick event handler for the calibration. This is more accurate than monitoring jiffies. At this point of the boot process, nothing disturbes the interrupt delivery, so the results are very accurate. - Verify the calibration against the PM timer, when available by using the early access function. When the measured calibration period is outside of an one percent window, then the lapic timer calibration is adjusted to the pm timer result. - Verify the calibration by running the lapic timer with the calibration handler. Disable lapic timer in case of deviation. This also removes the "synchronization" of the local apic timer to the global tick. This synchronization never worked, as there is no way to synchronize PIT(HPET) and local APIC timer. The synchronization by waiting for the tick just alignes the local APIC timer for the first events, but later the events drift away due to the different clocks. Removing the "sync" is just randomizing the asynchronous behaviour at setup time. Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- arch/i386/kernel/apic.c | 375 ++++++++++++++++++--------------- arch/i386/kernel/time_hpet.c | 22 - include/asm-i386/apic.h | 2 3 files changed, 210 insertions(+), 189 deletions(-) diff -puN arch/i386/kernel/apic.c~i386-lapic-timer-calibration arch/i386/kernel/apic.c --- a/arch/i386/kernel/apic.c~i386-lapic-timer-calibration +++ a/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include <linux/sysdev.h> #include <linux/cpu.h> #include <linux/clockchips.h> +#include <linux/acpi_pmtmr.h> #include <linux/module.h> #include <asm/atomic.h> @@ -168,64 +169,8 @@ int lapic_get_maxlvt(void) * Local APIC timer */ -/* - * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts - * per second. We assume that the caller has already set up the local - * APIC. - * - * The APIC timer is not exactly sync with the external timer chip, it - * closely follows bus clocks. - */ - -/* - * FIXME: Move this to i8253.h. There is no need to keep the access to - * the PIT scattered all around the place -tglx - */ - -/* - * The timer chip is already set up at HZ interrupts per second here, - * but we do not accept timer interrupts yet. We only allow the BP - * to calibrate. - */ -static unsigned int __devinit get_8254_timer_count(void) -{ - unsigned long flags; - - unsigned int count; - - spin_lock_irqsave(&i8253_lock, flags); - - outb_p(0x00, PIT_MODE); - count = inb_p(PIT_CH0); - count |= inb_p(PIT_CH0) << 8; - - spin_unlock_irqrestore(&i8253_lock, flags); - - return count; -} - -/* next tick in 8254 can be caught by catching timer wraparound */ -static void __devinit wait_8254_wraparound(void) -{ - unsigned int curr_count, prev_count; - - curr_count = get_8254_timer_count(); - do { - prev_count = curr_count; - curr_count = get_8254_timer_count(); - - /* workaround for broken Mercury/Neptune */ - if (prev_count >= curr_count + 0x100) - curr_count = get_8254_timer_count(); - - } while (prev_count >= curr_count); -} - -/* - * Default initialization for 8254 timers. If we use other timers like HPET, - * we override this later - */ -void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound; +/* Clock divisor is set to 16 */ +#define APIC_DIVISOR 16 /* * This function sets up the local APIC timer, with a timeout of @@ -237,10 +182,7 @@ void (*wait_timer_tick)(void) __devinitd * We do reads before writes even if unnecessary, to get around the * P5 APIC double write bug. */ - -#define APIC_DIVISOR 16 - -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) { unsigned int lvtt_value, tmp_value; int cpu = smp_processor_id(); @@ -251,7 +193,7 @@ static void __setup_APIC_LVTT(unsigned i if (!lapic_is_integrated()) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - if (cpu_isset(cpu, timer_bcast_ipi)) + if (!irqen) lvtt_value |= APIC_LVT_MASKED; apic_write_around(APIC_LVTT, lvtt_value); @@ -268,25 +210,27 @@ static void __setup_APIC_LVTT(unsigned i apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } +/* + * Program the next event, relative to now + */ static void lapic_next_event(unsigned long delta, struct clock_event_device *evt) { apic_write_around(APIC_TMICT, delta); } +/* + * Setup the lapic timer in periodic or oneshot mode + */ static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt) { + int cpu = smp_processor_id(); unsigned long flags; local_irq_save(flags); - if (CLOCK_EVT_PERIODIC) { - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); - } - __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC); + __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC, + cpu_isset(cpu, timer_bcast_ipi)); local_irq_restore(flags); } @@ -304,112 +248,235 @@ static void __devinit setup_APIC_timer(v } /* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. + * In this functions we calibrate APIC bus clocks to the external timer. + * + * We want to do the calibration only once since we want to have local timer + * irqs syncron. CPUs connected by the same APIC bus have the very same bus + * frequency. + * + * This was previously done by reading the PIT/HPET and waiting for a wrap + * around to find out, that a tick has elapsed. I have a box, where the PIT + * readout is broken, so it never gets out of the wait loop again. This was + * also reported by others. * - * TODO: Fix this rather than saying "Ugh" -tglx + * Monitoring the jiffies value is inaccurate and the clockevents + * infrastructure allows us to do a simple substitution of the interrupt + * handler. * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. + * The calibration routine also uses the pm_timer when possible, as the PIT + * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes + * back to normal later in the boot process). */ -static int __init calibrate_APIC_clock(void) +#define LAPIC_CAL_LOOPS (HZ/10) + +static __initdata volatile int lapic_cal_loops = -1; +static __initdata long lapic_cal_t1, lapic_cal_t2; +static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; +static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; + +/* + * Temporary interrupt handler. + */ +static void __init lapic_cal_handler(struct pt_regs *regs) { - unsigned long long t1 = 0, t2 = 0; - long tt1, tt2; - long result; - int i; - const int LOOPS = HZ/10; + unsigned long long tsc = 0; + long tapic = apic_read(APIC_TMCCT); + unsigned long pm = acpi_pm_read_early(); - apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n"); + if (cpu_has_tsc) + rdtscll(tsc); - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - */ - __setup_APIC_LVTT(1000000000, 0); + switch (lapic_cal_loops++) { + case 0: + lapic_cal_t1 = tapic; + lapic_cal_tsc1 = tsc; + lapic_cal_pm1 = pm; + lapic_cal_j1 = jiffies; + break; - /* - * The timer chip counts down to zero. Let's wait - * for a wraparound to start exact measurement: - * (the current tick might have been already half done) - */ + case LAPIC_CAL_LOOPS: + lapic_cal_t2 = tapic; + lapic_cal_tsc2 = tsc; + if (pm < lapic_cal_pm1) + pm += ACPI_PM_OVRRUN; + lapic_cal_pm2 = pm; + lapic_cal_j2 = jiffies; + break; + } +} + +/* + * Setup the boot APIC + * + * Calibrate and verify the result. + */ +void __init setup_boot_APIC_clock(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + const long pm_100ms = PMTMR_TICKS_PER_SEC/10; + const long pm_thresh = pm_100ms/100; + void (*real_handler)(struct pt_regs *regs); + unsigned long deltaj; + long delta, deltapm; + cpumask_t cpumask; - wait_timer_tick(); + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + using_apic_timer = 1; - /* - * We wrapped around just now. Let's start: - */ - if (cpu_has_tsc) - rdtscll(t1); - tt1 = apic_read(APIC_TMCCT); + local_irq_disable(); + + /* Replace the global interrupt handler */ + real_handler = global_clock_event->event_handler; + global_clock_event->event_handler = lapic_cal_handler; /* - * Let's wait LOOPS wraprounds: + * Setup the APIC counter to 1e9. There is no way the lapic + * can underflow in the 100ms detection time frame */ - for (i = 0; i < LOOPS; i++) - wait_timer_tick(); + __setup_APIC_LVTT(1000000000, 0, 0); - tt2 = apic_read(APIC_TMCCT); - if (cpu_has_tsc) - rdtscll(t2); + /* Let the interrupts run */ + local_irq_enable(); - /* - * The APIC bus clock counter is 32 bits only, it - * might have overflown, but note that we use signed - * longs, thus no extra care needed. - * - * underflown to be exact, as the timer counts down ;) - */ + while(lapic_cal_loops <= LAPIC_CAL_LOOPS); - result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + local_irq_disable(); + + /* Restore the real event handler */ + global_clock_event->event_handler = real_handler; + + /* Build delta t1-t2 as apic timer counts down */ + delta = lapic_cal_t1 - lapic_cal_t2; + apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + + /* Check, if the PM timer is available */ + deltapm = lapic_cal_pm2 - lapic_cal_pm1; + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + if (deltapm) { + unsigned long mult; + u64 res; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64) deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64) delta ) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long) res, delta); + delta = (long) res; + } + } /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(tt1-tt2, TICK_NSEC * LOOPS, 32); + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - apic_printk(APIC_VERBOSE, "..... tt1-tt2 %ld\n", tt1 - tt2); + calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + + apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %ld\n", result); + apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", + calibration_result); - if (cpu_has_tsc) + if (cpu_has_tsc) { + delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - ((long)(t2-t1)/LOOPS)/(1000000/HZ), - ((long)(t2-t1)/LOOPS)%(1000000/HZ)); + "%ld.%04ld MHz.\n", + (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + } apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%ld.%04ld MHz.\n", - result/(1000000/HZ), - result%(1000000/HZ)); + "%u.%04u MHz.\n", + calibration_result / (1000000 / HZ), + calibration_result % (1000000 / HZ)); - return result; -} -void __init setup_boot_APIC_clock(void) -{ - unsigned long flags; - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); - using_apic_timer = 1; + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - local_irq_save(flags); - - calibration_result = calibrate_APIC_clock(); /* - * Now set up the timer for real. + * Start LAPIC timer and verify that the calculated factor is correct */ setup_APIC_timer(); - local_irq_restore(flags); + /* Replace the lapic interrupt handler */ + real_handler = levt->event_handler; + levt->event_handler = lapic_cal_handler; + lapic_cal_loops = -1; + + /* Let the interrupts run */ + local_irq_enable(); + + while(lapic_cal_loops <= LAPIC_CAL_LOOPS); + + local_irq_disable(); + + /* Restore the real event handler */ + levt->event_handler = real_handler; + + local_irq_enable(); + + /* Jiffies delta */ + deltaj = lapic_cal_j2 - lapic_cal_j1; + apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + + /* Check, if the PM timer is available */ + deltapm = lapic_cal_pm2 - lapic_cal_pm1; + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + if (deltapm) { + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + /* Check, if the jiffies result is consistent */ + if (deltaj < LAPIC_CAL_LOOPS-2 || + deltaj > LAPIC_CAL_LOOPS+2) { + /* + * Not sure, what we can do about this one. + * When high resultion timers are active + * and the lapic timer does not stop in C3 + * we are fine. Otherwise more trouble might + * be waiting. -- tglx + */ + printk(KERN_WARNING "Global event device %s " + "has wrong frequency " + "(%lu ticks instead of %d)\n", + global_clock_event->name, deltaj, + LAPIC_CAL_LOOPS); + } + return; + } + } else { + /* Check, if the jiffies result is consistent */ + if (deltaj >= LAPIC_CAL_LOOPS-2 && + deltaj <= LAPIC_CAL_LOOPS+2) { + apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); + return; + } + } + + printk(KERN_WARNING + "APIC timer disabled due to verification failure.\n"); + local_irq_disable(); + cpumask = cpumask_of_cpu(smp_processor_id()); + switch_APIC_timer_to_ipi(&cpumask); + local_irq_enable(); } void __devinit setup_secondary_APIC_clock(void) @@ -507,29 +574,6 @@ void switch_ipi_to_APIC_timer(void *cpum EXPORT_SYMBOL(switch_ipi_to_APIC_timer); /* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - */ -inline void smp_local_timer_interrupt(void) -{ - profile_tick(CPU_PROFILING); -#ifdef CONFIG_SMP - update_process_times(user_mode_vm(get_irq_regs())); -#endif - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - -/* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by * broadcast interrupts too. [in case the hw doesn't support APIC timers] @@ -569,13 +613,14 @@ fastcall void smp_apic_timer_interrupt(s static void up_apic_timer_interrupt_call(void) { int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); /* * the NMI deadlock-detector uses this. */ per_cpu(irq_stat, cpu).apic_timer_irqs++; - smp_local_timer_interrupt(); + evt->event_handler(get_irq_regs()); } #endif diff -puN arch/i386/kernel/time_hpet.c~i386-lapic-timer-calibration arch/i386/kernel/time_hpet.c --- a/arch/i386/kernel/time_hpet.c~i386-lapic-timer-calibration +++ a/arch/i386/kernel/time_hpet.c @@ -43,23 +43,6 @@ static void hpet_writel(unsigned long d, writel(d, hpet_virt_address + a); } -#ifdef CONFIG_X86_LOCAL_APIC -/* - * HPET counters dont wrap around on every tick. They just change the - * comparator value and continue. Next tick can be caught by checking - * for a change in the comparator value. Used in apic.c. - */ -static void __devinit wait_hpet_tick(void) -{ - unsigned int start_cmp_val, end_cmp_val; - - start_cmp_val = hpet_readl(HPET_T0_CMP); - do { - end_cmp_val = hpet_readl(HPET_T0_CMP); - } while (start_cmp_val == end_cmp_val); -} -#endif - static int hpet_timer_stop_set_go(unsigned long tick) { unsigned int cfg; @@ -213,11 +196,6 @@ int __init hpet_enable(void) hpet_alloc(&hd); } #endif - -#ifdef CONFIG_X86_LOCAL_APIC - if (hpet_use_timer) - wait_timer_tick = wait_hpet_tick; -#endif return 0; } diff -puN include/asm-i386/apic.h~i386-lapic-timer-calibration include/asm-i386/apic.h --- a/include/asm-i386/apic.h~i386-lapic-timer-calibration +++ a/include/asm-i386/apic.h @@ -93,8 +93,6 @@ static inline void ack_APIC_irq(void) apic_write_around(APIC_EOI, 0); } -extern void (*wait_timer_tick)(void); - extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void connect_bsp_APIC (void); _ Patches currently in -mm which might be from tglx@xxxxxxxxxxxxx are setup_irq-better-mismatch-debugging.patch gtod-exponential-update_wall_time.patch gtod-persistent-clock-support-core.patch gtod-persistent-clock-support-i386.patch time-uninline-jiffiesh.patch time-uninline-jiffiesh-fix.patch time-fix-msecs_to_jiffies-bug.patch time-fix-timeout-overflow.patch cleanup-uninline-irq_enter-and-move-it-into-a-function.patch dynticks-extend-next_timer_interrupt-to-use-a-reference-jiffie.patch dynticks-extend-next_timer_interrupt-to-use-a-reference-jiffie-remove-incorrect-warning-in-kernel-timerc.patch hrtimers-namespace-and-enum-cleanup.patch hrtimers-clean-up-locking.patch updated-hrtimers-state-tracking.patch updated-hrtimers-clean-up-callback-tracking.patch updated-hrtimers-move-and-add-documentation.patch updated-add-a-framework-to-manage-clock-event-devices.patch updated-acpi-include-apich.patch updated-acpi-keep-track-of-timer-broadcast.patch updated-acpi-add-state-propagation-for-dynamic-broadcasting.patch updated-i386-cleanup-apic-code.patch updated-i386-convert-to-clock-event-devices.patch updated-pm_timer-allow-early-access-and-move-externs-to-a-header-file.patch updated-i386-rework-local-apic-calibration.patch updated-high-res-timers-core.patch updated-gtod-mark-tsc-unusable-for-highres-timers.patch updated-dynticks-core-code.patch updated-dyntick-add-nohz-stats-to-proc-stat.patch updated-dynticks-i386-arch-code.patch updated-dynticks-fix-nmi-watchdog.patch updated-high-res-timers-dynticks-enable-i386-support.patch updated-debugging-feature-timer-stats.patch clockevents-core-check-for-clock-event-device-handler-being-non-null-before-calling-it.patch round_jiffies-infrastructure.patch round_jiffies-infrastructure-fix.patch clocksource-add-usage-of-config_sysfs.patch clocksource-small-cleanup-2.patch clocksource-small-cleanup-2-fix.patch clocksource-small-acpi_pm-cleanup.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html