The patch titled hrtimer: do more apic stuff has been removed from the -mm tree. Its filename was hrtimer-do-more-apic-stuff.patch This patch was dropped because an updated version will be merged ------------------------------------------------------ Subject: hrtimer: do more apic stuff From: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Can you please test this on your VAIO ? If it works, I'm doing a full respin. Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- Documentation/kernel-parameters.txt | 5 arch/i386/kernel/apic.c | 144 +++++--------------------- arch/i386/kernel/nmi.c | 9 + drivers/acpi/processor_idle.c | 75 +------------ include/acpi/processor.h | 3 include/asm-i386/apic.h | 1 include/asm-x86_64/apic.h | 1 kernel/time/clockevents.c | 46 ++++---- 8 files changed, 73 insertions(+), 211 deletions(-) diff -puN arch/i386/kernel/apic.c~hrtimer-do-more-apic-stuff arch/i386/kernel/apic.c --- a/arch/i386/kernel/apic.c~hrtimer-do-more-apic-stuff +++ a/arch/i386/kernel/apic.c @@ -59,6 +59,9 @@ */ static int enable_local_apic __initdata = 0; +/* Enable local APIC timer for highres/dyntick on UP */ +static int enable_local_apic_timer __initdata = 0; + /* * Debug level, exported for io_apic.c */ @@ -78,7 +81,7 @@ static void apic_pm_activate(void); */ static struct clock_event_device lapic_clockevent = { .name = "lapic", - .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE + .capabilities = CLOCK_CAP_PROFILE #ifdef CONFIG_SMP /* * On UP we keep update_process_times() on the PIT interrupt to @@ -92,21 +95,7 @@ static struct clock_event_device lapic_c .set_mode = lapic_timer_setup, .set_next_event = lapic_next_event, }; - -/* - * Per CPU local APIC data structure: - * - clock event device - * - variables to hold timer verification data - */ -struct lapic_event_device { - struct clock_event_device evdev; - unsigned long last_delta; - unsigned long counter; -}; -static DEFINE_PER_CPU(struct lapic_event_device, lapic_events); - -/* Scaled math multiplication factor for ACPI lapic verification */ -static unsigned long acpi_verify_mult; +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase; @@ -221,11 +210,6 @@ static void __setup_APIC_LVTT(unsigned i static void lapic_next_event(unsigned long delta, struct clock_event_device *evt) { - struct lapic_event_device *ldev; - - ldev = container_of(evt, struct lapic_event_device, evdev); - ldev->last_delta = delta; - apic_write_around(APIC_TMICT, delta); } @@ -235,23 +219,18 @@ static void lapic_next_event(unsigned lo static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt) { - struct lapic_event_device *ldev; unsigned long flags; unsigned int v; - ldev = container_of(evt, struct lapic_event_device, evdev); - local_irq_save(flags); switch (mode) { case CLOCK_EVT_PERIODIC: - ldev->last_delta = calibration_result / APIC_DIVISOR; case CLOCK_EVT_ONESHOT: __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC, 1); break; case CLOCK_EVT_SHUTDOWN: - ldev->last_delta = 0; v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write_around(APIC_LVTT, v); @@ -267,7 +246,7 @@ static void lapic_timer_setup(enum clock */ static void __devinit setup_APIC_timer(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; + struct clock_event_device *levt = &__get_cpu_var(lapic_events); memcpy(levt, &lapic_clockevent, sizeof(*levt)); @@ -341,7 +320,7 @@ static void __init lapic_cal_handler(str */ void __init setup_boot_APIC_clock(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; + struct clock_event_device *levt = &__get_cpu_var(lapic_events); const long pm_100ms = PMTMR_TICKS_PER_SEC/10; const long pm_thresh = pm_100ms/100; void (*real_handler)(struct pt_regs *regs); @@ -355,6 +334,13 @@ void __init setup_boot_APIC_clock(void) /* Register broadcast function */ clockevents_register_broadcast(lapic_timer_broadcast); + /* + * Enable the apic timer next event capability only for + * SMP and on UP, when requested via commandline + */ + if (num_possible_cpus() > 1 || enable_local_apic_timer) + lapic_clockevent.capabilities |= CLOCK_CAP_NEXTEVT; + local_irq_disable(); /* Replace the global interrupt handler */ @@ -407,13 +393,6 @@ void __init setup_boot_APIC_clock(void) "%lu (%ld)\n", (unsigned long) res, delta); delta = (long) res; } - /* - * Calculate the pmtimer -> lapic conversion factor to - * verify the lapic stability in the power states. - */ - acpi_verify_mult = div_sc(delta, deltapm, 22); - apic_printk(APIC_VERBOSE, "... acpi_verify_mult = %lu\n", - acpi_verify_mult); } /* Calculate the scaled math multiplication factor */ @@ -522,22 +501,22 @@ void __devinit setup_secondary_APIC_cloc void switch_APIC_timer_to_ipi(void *cpumask) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; + struct clock_event_device *levt = &__get_cpu_var(lapic_events); cpumask_t mask = *(cpumask_t *)cpumask; int cpu = smp_processor_id(); - if (cpu_isset(cpu, mask)) + if (cpu_isset(cpu, mask) && levt->event_handler) clockevents_set_global_broadcast(levt, 1); } EXPORT_SYMBOL(switch_APIC_timer_to_ipi); void switch_ipi_to_APIC_timer(void *cpumask) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; + struct clock_event_device *levt = &__get_cpu_var(lapic_events); cpumask_t mask = *(cpumask_t *)cpumask; int cpu = smp_processor_id(); - if (cpu_isset(cpu, mask)) + if (cpu_isset(cpu, mask) && levt->event_handler) clockevents_set_global_broadcast(levt, 0); } EXPORT_SYMBOL(switch_ipi_to_APIC_timer); @@ -548,7 +527,7 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer); fastcall void local_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu).evdev; + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); /* Normally we should not be here till LAPIC has been initialized * but in some cases like kdump, its possible that there is a @@ -625,82 +604,11 @@ static void lapic_timer_broadcast(cpumas void lapic_timer_idle_broadcast(int broadcast) { int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu).evdev; - unsigned long flags; + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - local_irq_save(flags); - clockevents_set_broadcast(evt, broadcast); - local_irq_restore(flags); + if (evt->event_handler) + clockevents_set_broadcast(evt, broadcast); } -EXPORT_SYMBOL_GPL(lapic_timer_idle_broadcast); - -/* - * Local APIC verify that timer is stable during this power state - * - * Called with interrupts disabled. - */ -int lapic_timer_idle_verify(unsigned long ticks) -{ - struct lapic_event_device *dev = &__get_cpu_var(lapic_events); - long delta_apic, delta_pm, delta, counter = apic_read(APIC_TMCCT); - const uint32_t pm_500us = PMTMR_TICKS_PER_SEC/2000; - const long pm_250us = PMTMR_TICKS_PER_SEC/4000; - const long pm_100us = PMTMR_TICKS_PER_SEC/10000; - uint64_t delta_ticks; - - /* - * Start the verification: Store current time and the apic counter - */ - if (!ticks) { - dev->counter = counter; - return 0; - } - - /* - * End of verification: - * - * Convert pm timer ticks (from ACPI) to lapic ticks and - * compare with the lapic delta. - * - * We do not make decisions on short sleeps (< 500us) and - * we back out, when the lapic is switched off already - * (last_delta = 0) - */ - if (ticks < pm_500us || !dev->last_delta) - return 0; - delta_ticks = (((u64) ticks) * acpi_verify_mult) >> 22; - delta_pm = (long) delta_ticks; - - delta_apic = dev->counter - counter; - /* Take wraps in periodic mode into account */ - if (delta_apic <= 0) - delta_apic += dev->last_delta; - - /* Calculate the delta between lapic and pm timer */ - delta = delta_pm - delta_apic; - /* - * The delta between pmtimer and lapic is less than 100us: - * lapic is stable. This catches also delta_pm < delta_apic, - * which happens due to clock skew and rounding errors. - */ - if (delta < pm_100us) - return 1; - - /* - * The delta between pmtimer and lapic is greater than 250us: - * lapic is unstable. - */ - if (delta > pm_250us) { - apic_printk(APIC_VERBOSE, "lapic timer verify: delta %ld " - "pmtimer %ld (%ld) lapic %ld(%ld %ld %ld) " - "on cpu %d\n", delta, delta_pm, ticks, delta_apic, - counter, dev->counter, dev->last_delta, - smp_processor_id()); - return -1; - } - return 0; -} -EXPORT_SYMBOL_GPL(lapic_timer_idle_verify); int setup_profiling_timer(unsigned int multiplier) { @@ -1331,6 +1239,13 @@ static int __init parse_nolapic(char *ar } early_param("nolapic", parse_nolapic); +static int __init apic_enable_lapic_timer(char *str) +{ + enable_local_apic_timer = 1; + return 0; +} +early_param("lapictimer", apic_enable_lapic_timer); + static int __init apic_set_verbosity(char *str) { if (strcmp("debug", str) == 0) @@ -1342,7 +1257,6 @@ static int __init apic_set_verbosity(cha __setup("apic=", apic_set_verbosity); - /* * Local APIC interrupts */ diff -puN arch/i386/kernel/nmi.c~hrtimer-do-more-apic-stuff arch/i386/kernel/nmi.c --- a/arch/i386/kernel/nmi.c~hrtimer-do-more-apic-stuff +++ a/arch/i386/kernel/nmi.c @@ -23,6 +23,7 @@ #include <linux/dmi.h> #include <linux/kprobes.h> #include <linux/cpumask.h> +#include <linux/kernel_stat.h> #include <asm/smp.h> #include <asm/nmi.h> @@ -920,9 +921,13 @@ __kprobes int nmi_watchdog_tick(struct p cpu_clear(cpu, backtrace_mask); } - sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + /* + * Take the local apic timer and PIT/HPET into account. We don't + * know which one is active, when we have highres/dyntick on + */ + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); - /* if the apic timer isn't firing, this cpu isn't doing much */ + /* if the none of the timers isn't firing, this cpu isn't doing much */ if (!touched && last_irq_sums[cpu] == sum) { /* * Ayiee, looks like this CPU is stuck ... diff -puN Documentation/kernel-parameters.txt~hrtimer-do-more-apic-stuff Documentation/kernel-parameters.txt --- a/Documentation/kernel-parameters.txt~hrtimer-do-more-apic-stuff +++ a/Documentation/kernel-parameters.txt @@ -759,6 +759,11 @@ and is between 256 and 4096 characters. lapic [IA-32,APIC] Enable the local APIC even if BIOS disabled it. + lapictimer [IA-32,APIC] Enable the local APIC timer on UP + systems for high resulution timers and dynticks. + This only has an effect when the local APIC is + available. It does not imply the "lapic" option. + lasi= [HW,SCSI] PARISC LASI driver for the 53c700 chip Format: addr:<io>,irq:<irq> diff -puN drivers/acpi/processor_idle.c~hrtimer-do-more-apic-stuff drivers/acpi/processor_idle.c --- a/drivers/acpi/processor_idle.c~hrtimer-do-more-apic-stuff +++ a/drivers/acpi/processor_idle.c @@ -258,32 +258,17 @@ static void acpi_timer_check_state(int s struct acpi_processor_power *pwr = &pr->power; /* - * FIXME: Initialize this when the data structure is created ! - */ - if (!pr->power.timer_state_unstable) - pr->power.timer_state_unstable = INT_MAX; - - /* * Check, if one of the previous states already marked the lapic * unstable */ if (pwr->timer_broadcast_on_state < state) return; -#ifdef CONFIG_X86_64 - /* - * This can go away, when x86_64 has the detection support - */ if(cx->type == ACPI_STATE_C3 || - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) -#else - /* - * We could autodetect that too - */ - if(cx->type == ACPI_STATE_C3) -#endif + boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { pr->power.timer_broadcast_on_state = state; - + return; + } } static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) @@ -303,36 +288,8 @@ static void acpi_state_timer_broadcast(s { int state = cx - pr->power.states; - if (state >= pr->power.timer_broadcast_on_state) { + if (state >= pr->power.timer_broadcast_on_state) lapic_timer_idle_broadcast(broadcast); - return; - } - - /* - * On cstate entry we save the lapic timer value - */ - lapic_timer_idle_verify(0); -} - -/* C-State timer verification */ -static void acpi_state_timer_verify(struct acpi_processor *pr, - struct acpi_processor_cx *cx, - uint32_t ticks) -{ - struct acpi_processor_power *pwr = &pr->power; - int state = cx - pr->power.states; - - if (pwr->timer_state_unstable <= state) - return; - - if (lapic_timer_idle_verify(ticks) < 0) { - if (cx->timer_verify++ == 10) { - pwr->timer_state_unstable = state; - printk(KERN_WARNING - "ACPI: lapic on CPU %d stops in C%d[C%d]\n", - smp_processor_id(), state, cx->type); - } - } } #else @@ -345,11 +302,6 @@ static void acpi_state_timer_broadcast(s int broadcast) { } -static void acpi_state_timer_verify(struct acpi_processor *pr, - struct acpi_processor_cx *cx, - uint32_t ticks) -{ -} #endif @@ -499,10 +451,6 @@ static void acpi_processor_idle(void) acpi_cstate_enter(cx); /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); - /* Compute time (ticks) that we were actually asleep */ - sleep_ticks = - ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; - acpi_state_timer_verify(pr, cx, sleep_ticks); #ifdef CONFIG_GENERIC_TIME /* TSC halts in C2, so notify users */ @@ -511,6 +459,9 @@ static void acpi_processor_idle(void) /* Re-enable interrupts */ local_irq_enable(); current_thread_info()->status |= TS_POLLING; + /* Compute time (ticks) that we were actually asleep */ + sleep_ticks = + ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; acpi_state_timer_broadcast(pr, cx, 0); break; @@ -578,18 +529,6 @@ static void acpi_processor_idle(void) #endif /* - * If the lapic verification found a stopped lapic, we have - * to propagate the result. We can not do it from the verify - * code as smp calls must have interrupts enabled. - */ - if (pr->power.timer_state_unstable < - pr->power.timer_broadcast_on_state) { - pr->power.timer_broadcast_on_state = - pr->power.timer_state_unstable; - acpi_propagate_timer_broadcast(pr); - } - - /* * Promotion? * ---------- * Track the number of longs (time asleep is greater than threshold) diff -puN include/acpi/processor.h~hrtimer-do-more-apic-stuff include/acpi/processor.h --- a/include/acpi/processor.h~hrtimer-do-more-apic-stuff +++ a/include/acpi/processor.h @@ -67,7 +67,6 @@ struct acpi_processor_cx { u32 latency_ticks; u32 power; u32 usage; - s32 timer_verify; u64 time; struct acpi_processor_cx_policy promotion; struct acpi_processor_cx_policy demotion; @@ -81,8 +80,6 @@ struct acpi_processor_power { int count; struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER]; int timer_broadcast_on_state; - int timer_state_verified; - int timer_state_unstable; }; /* Performance Management */ diff -puN include/asm-i386/apic.h~hrtimer-do-more-apic-stuff include/asm-i386/apic.h --- a/include/asm-i386/apic.h~hrtimer-do-more-apic-stuff +++ a/include/asm-i386/apic.h @@ -110,7 +110,6 @@ extern void setup_boot_APIC_clock (void) extern void setup_secondary_APIC_clock (void); extern int APIC_init_uniprocessor (void); extern void lapic_timer_idle_broadcast(int broadcast); -extern int lapic_timer_idle_verify(unsigned long ticks); extern void enable_NMI_through_LVT0 (void * dummy); void switch_APIC_timer_to_ipi(void *cpumask); diff -puN include/asm-x86_64/apic.h~hrtimer-do-more-apic-stuff include/asm-x86_64/apic.h --- a/include/asm-x86_64/apic.h~hrtimer-do-more-apic-stuff +++ a/include/asm-x86_64/apic.h @@ -88,7 +88,6 @@ extern void clustered_apic_check(void); extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector, unsigned char msg_type, unsigned char mask); static inline void lapic_timer_idle_broadcast(int broadcast) { } -static inline int lapic_timer_idle_verify(unsigned long ticks) { return 0;} #define K8_APIC_EXT_LVT_BASE 0x500 #define K8_APIC_EXT_INT_MSG_FIX 0x0 diff -puN kernel/time/clockevents.c~hrtimer-do-more-apic-stuff kernel/time/clockevents.c --- a/kernel/time/clockevents.c~hrtimer-do-more-apic-stuff +++ a/kernel/time/clockevents.c @@ -317,42 +317,46 @@ static void recalc_active_event(struct e * Called with event_lock held to protect the global event device. */ static int recalc_events(struct local_events *devices, - struct clock_event_device *evt, unsigned int caps, - int new) + struct event_descr *descr, + struct clock_event_device *evt, unsigned int caps) { int i; - if (new && devices->installed == MAX_CLOCK_EVENTS) + if (!descr && devices->installed == MAX_CLOCK_EVENTS) return -ENOSPC; /* * If there is no handler and this is not a next-event capable * event device, refuse to handle it */ - if ((!evt->capabilities & CLOCK_CAP_NEXTEVT) && !event_handlers[caps]) { + if (!(evt->capabilities & CLOCK_CAP_NEXTEVT) && !event_handlers[caps]) { printk(KERN_ERR "Unsupported clock event device %s\n", evt->name); return -EINVAL; } - if (caps && global_eventdevice.event && global_eventdevice.event != evt) - recalc_active_event(&global_eventdevice, caps); + if (caps) { + if (global_eventdevice.event && descr != &global_eventdevice) + recalc_active_event(&global_eventdevice, caps); - for (i = 0; i < devices->installed; i++) { - if (devices->events[i].event != evt) - recalc_active_event(&devices->events[i], caps); + for (i = 0; i < devices->installed; i++) { + if (&devices->events[i] != descr) + recalc_active_event(&devices->events[i], caps); + } } - if (new) - devices->events[devices->installed++].event = evt; + /* New device ? */ + if (!descr) { + descr = &devices->events[devices->installed++]; + descr->event = evt; + } if (caps) { /* Is next_event event device going to be installed? */ if (caps & CLOCK_CAP_NEXTEVT) caps = CLOCK_CAP_NEXTEVT; - setup_event(&devices->events[devices->installed], - evt, caps); + setup_event(descr, evt, caps); } else printk(KERN_INFO "Inactive clock event device %s registered\n", evt->name); @@ -376,8 +380,8 @@ int register_local_clockevent(struct clo evt->event_handler = handle_noop; /* Recalc event devices and maybe reassign handlers */ - ret = recalc_events(devices, evt, - evt->capabilities & CLOCK_BASE_CAPS_MASK, 1); + ret = recalc_events(devices, NULL, evt, + evt->capabilities & CLOCK_BASE_CAPS_MASK); spin_unlock_irqrestore(&events_lock, flags); @@ -445,7 +449,7 @@ int clockevents_next_event_available(voi int clockevents_init_next_event(void) { struct local_events *devices = &__get_cpu_var(local_eventdevices); - struct clock_event_device *nextevt; + struct event_descr *nextevt; unsigned long flags; int idx, ret = -ENODEV; @@ -455,17 +459,17 @@ int clockevents_init_next_event(void) spin_lock_irqsave(&events_lock, flags); idx = get_next_event_device(); - if (idx < 0) + if (IS_ERR_VALUE(idx)) goto out_unlock; if (idx == GLOBAL_CLOCK_EVENT) - nextevt = global_eventdevice.event; + nextevt = &global_eventdevice; else - nextevt = devices->events[idx].event; + nextevt = &devices->events[idx]; - ret = recalc_events(devices, nextevt, CLOCK_CAPS_MASK, 0); + ret = recalc_events(devices, nextevt, nextevt->event, CLOCK_CAPS_MASK); if (!ret) - devices->nextevt = nextevt; + devices->nextevt = nextevt->event; out_unlock: spin_unlock_irqrestore(&events_lock, flags); _ Patches currently in -mm which might be from tglx@xxxxxxxxxxxxx are setup_irq-better-mismatch-debugging.patch gtod-exponential-update_wall_time.patch gtod-persistent-clock-support-core.patch gtod-persistent-clock-support-i386.patch time-uninline-jiffiesh.patch time-uninline-jiffiesh-fix.patch time-fix-msecs_to_jiffies-bug.patch time-fix-timeout-overflow.patch cleanup-uninline-irq_enter-and-move-it-into-a-function.patch dynticks-extend-next_timer_interrupt-to-use-a-reference-jiffie.patch dynticks-extend-next_timer_interrupt-to-use-a-reference-jiffie-remove-incorrect-warning-in-kernel-timerc.patch hrtimers-namespace-and-enum-cleanup.patch hrtimers-clean-up-locking.patch updated-hrtimers-state-tracking.patch updated-hrtimers-clean-up-callback-tracking.patch updated-hrtimers-move-and-add-documentation.patch updated-add-a-framework-to-manage-clock-event-devices.patch updated-acpi-include-apich.patch updated-acpi-keep-track-of-timer-broadcast.patch updated-acpi-add-state-propagation-for-dynamic-broadcasting.patch updated-i386-cleanup-apic-code.patch updated-i386-convert-to-clock-event-devices.patch updated-pm_timer-allow-early-access-and-move-externs-to-a-header-file.patch updated-i386-rework-local-apic-calibration.patch updated-high-res-timers-core.patch updated-gtod-mark-tsc-unusable-for-highres-timers.patch updated-dynticks-core-code.patch updated-dyntick-add-nohz-stats-to-proc-stat.patch updated-dynticks-i386-arch-code.patch updated-dynticks-fix-nmi-watchdog.patch updated-high-res-timers-dynticks-enable-i386-support.patch updated-debugging-feature-timer-stats.patch clockevents-core-check-for-clock-event-device-handler-being-non-null-before-calling-it.patch round_jiffies-infrastructure.patch round_jiffies-infrastructure-fix.patch clocksource-add-usage-of-config_sysfs.patch clocksource-small-cleanup-2.patch clocksource-small-cleanup-2-fix.patch clocksource-small-acpi_pm-cleanup.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html