The patch titled lock validator: reenable NMIs has been removed from the -mm tree. Its filename is lock-validator-v3-lock-validator-reenable-nmis.patch This patch was dropped because lockdep is being redone ------------------------------------------------------ Subject: lock validator: reenable NMIs From: Ingo Molnar <mingo@xxxxxxx> - not touch the irqtrace state in NMI handlers - re-enable NMIs and oprofile on i386 and x86_64 I have tested this with high-rate (10 KHz per CPU) NMI load on the kernel, and while previously we'd quickly get the irqtrace state confused and a lock validator bug message, with this patch applied it's now working fine. (Note that on x86_64 we only build one variant of the paranoid-exit codepath if IRQTRACE is disabled - so there should be no difference in code generated, as long as the validator is disabled) Signed-off-by: Ingo Molnar <mingo@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- arch/i386/kernel/nmi.c | 10 -- arch/i386/oprofile/Kconfig | 2 arch/x86_64/kernel/entry.S | 133 +++++++++++++++++++-------------- arch/x86_64/kernel/nmi.c | 11 -- arch/x86_64/oprofile/Kconfig | 2 include/linux/hardirq.h | 4 kernel/lockdep.c | 16 +-- 7 files changed, 89 insertions(+), 89 deletions(-) diff -puN arch/i386/kernel/nmi.c~lock-validator-v3-lock-validator-reenable-nmis arch/i386/kernel/nmi.c --- a/arch/i386/kernel/nmi.c~lock-validator-v3-lock-validator-reenable-nmis +++ a/arch/i386/kernel/nmi.c @@ -747,16 +747,6 @@ void setup_apic_nmi_watchdog (void *unus { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); -#ifdef CONFIG_LOCKDEP - /* - * The NMI watchdog uses spinlocks (notifier chains, etc.), - * so it's not lockdep-safe: - */ - nmi_watchdog = NMI_NONE; - printk("lockdep: disabled NMI watchdog.\n"); - - return; -#endif /* only support LOCAL and IO APICs for now */ if ((nmi_watchdog != NMI_LOCAL_APIC) && (nmi_watchdog != NMI_IO_APIC)) diff -puN arch/i386/oprofile/Kconfig~lock-validator-v3-lock-validator-reenable-nmis arch/i386/oprofile/Kconfig --- a/arch/i386/oprofile/Kconfig~lock-validator-v3-lock-validator-reenable-nmis +++ a/arch/i386/oprofile/Kconfig @@ -7,7 +7,7 @@ config PROFILING config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" - depends on PROFILING && !LOCKDEP + depends on PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, diff -puN arch/x86_64/kernel/entry.S~lock-validator-v3-lock-validator-reenable-nmis arch/x86_64/kernel/entry.S --- a/arch/x86_64/kernel/entry.S~lock-validator-v3-lock-validator-reenable-nmis +++ a/arch/x86_64/kernel/entry.S @@ -718,7 +718,7 @@ END(spurious_interrupt) /* error code is on the stack already */ /* handle NMI like exceptions that can happen everywhere */ - .macro paranoidentry sym, ist=0 + .macro paranoidentry sym, ist=0, irqtrace=1 SAVE_ALL cld movl $1,%ebx @@ -743,9 +743,73 @@ END(spurious_interrupt) addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif cli + .if \irqtrace TRACE_IRQS_OFF + .endif .endm - + + /* + * "Paranoid" exit path from exception stack. + * Paranoid because this is used by NMIs and cannot take + * any kernel state for granted. + * We don't do kernel preemption checks here, because only + * NMI should be common and it does not enable IRQs and + * cannot get reschedule ticks. + * + * "trace" is 0 for the NMI handler only, because irq-tracing + * is fundamentally NMI-unsafe. (we cannot change the soft and + * hard flags at once, atomically) + */ + .macro paranoidexit trace=1 + /* ebx: no swapgs flag */ +paranoid_exit\trace: + testl %ebx,%ebx /* swapgs needed? */ + jnz paranoid_restore\trace + testl $3,CS(%rsp) + jnz paranoid_userspace\trace +paranoid_swapgs\trace: + TRACE_IRQS_IRETQ 0 + swapgs +paranoid_restore\trace: + RESTORE_ALL 8 + iretq +paranoid_userspace\trace: + GET_THREAD_INFO(%rcx) + movl threadinfo_flags(%rcx),%ebx + andl $_TIF_WORK_MASK,%ebx + jz paranoid_swapgs\trace + movq %rsp,%rdi /* &pt_regs */ + call sync_regs + movq %rax,%rsp /* switch stack for scheduling */ + testl $_TIF_NEED_RESCHED,%ebx + jnz paranoid_schedule\trace + movl %ebx,%edx /* arg3: thread flags */ + .if \trace + TRACE_IRQS_ON + .endif + sti + xorl %esi,%esi /* arg2: oldset */ + movq %rsp,%rdi /* arg1: &pt_regs */ + call do_notify_resume + cli + .if \trace + TRACE_IRQS_OFF + .endif + jmp paranoid_userspace\trace +paranoid_schedule\trace: + .if \trace + TRACE_IRQS_ON + .endif + sti + call schedule + cli + .if \trace + TRACE_IRQS_OFF + .endif + jmp paranoid_userspace\trace + CFI_ENDPROC + .endm + /* * Exception entry point. This expects an error code/orig_rax on the stack * and the exception handler in %rax. @@ -974,8 +1038,7 @@ KPROBE_ENTRY(debug) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK - jmp paranoid_exit - CFI_ENDPROC + paranoidexit END(debug) .previous .text @@ -984,54 +1047,12 @@ KPROBE_ENTRY(nmi) INTR_FRAME pushq $-1 CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_nmi - /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. - */ - /* ebx: no swapgs flag */ -paranoid_exit: - testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: - TRACE_IRQS_IRETQ 0 - swapgs -paranoid_restore: - RESTORE_ALL 8 - iretq -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl threadinfo_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - TRACE_IRQS_ON - sti - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - cli - TRACE_IRQS_OFF - jmp paranoid_userspace -paranoid_schedule: - TRACE_IRQS_ON - sti - call schedule - cli - TRACE_IRQS_OFF - jmp paranoid_userspace - CFI_ENDPROC + paranoidentry do_nmi, 0, 0 +#ifdef CONFIG_TRACE_IRQFLAGS + paranoidexit 0 +#else + paranoidexit 1 +#endif END(nmi) .previous .text @@ -1040,7 +1061,7 @@ KPROBE_ENTRY(int3) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_int3, DEBUG_STACK - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(int3) .previous .text @@ -1069,7 +1090,7 @@ END(reserved) ENTRY(double_fault) XCPT_FRAME paranoidentry do_double_fault - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(double_fault) @@ -1085,7 +1106,7 @@ END(segment_not_present) ENTRY(stack_segment) XCPT_FRAME paranoidentry do_stack_segment - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(stack_segment) @@ -1113,7 +1134,7 @@ ENTRY(machine_check) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check - jmp paranoid_exit + jmp paranoid_exit1 CFI_ENDPROC END(machine_check) #endif diff -puN arch/x86_64/kernel/nmi.c~lock-validator-v3-lock-validator-reenable-nmis arch/x86_64/kernel/nmi.c --- a/arch/x86_64/kernel/nmi.c~lock-validator-v3-lock-validator-reenable-nmis +++ a/arch/x86_64/kernel/nmi.c @@ -674,17 +674,6 @@ void setup_apic_nmi_watchdog(void *unuse { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); -#ifdef CONFIG_LOCKDEP - /* - * The NMI watchdog uses spinlocks (notifier chains, etc.), - * so it's not lockdep-safe: - */ - nmi_watchdog = NMI_NONE; - printk("lockdep: disabled NMI watchdog.\n"); - - return; -#endif - /* only support LOCAL and IO APICs for now */ if ((nmi_watchdog != NMI_LOCAL_APIC) && (nmi_watchdog != NMI_IO_APIC)) diff -puN arch/x86_64/oprofile/Kconfig~lock-validator-v3-lock-validator-reenable-nmis arch/x86_64/oprofile/Kconfig --- a/arch/x86_64/oprofile/Kconfig~lock-validator-v3-lock-validator-reenable-nmis +++ a/arch/x86_64/oprofile/Kconfig @@ -7,7 +7,7 @@ config PROFILING config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" - depends on PROFILING && !LOCKDEP + depends on PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, diff -puN include/linux/hardirq.h~lock-validator-v3-lock-validator-reenable-nmis include/linux/hardirq.h --- a/include/linux/hardirq.h~lock-validator-v3-lock-validator-reenable-nmis +++ a/include/linux/hardirq.h @@ -127,7 +127,7 @@ static inline void account_system_vtime( */ extern void irq_exit(void); -#define nmi_enter() irq_enter() -#define nmi_exit() __irq_exit() +#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff -puN kernel/lockdep.c~lock-validator-v3-lock-validator-reenable-nmis kernel/lockdep.c --- a/kernel/lockdep.c~lock-validator-v3-lock-validator-reenable-nmis +++ a/kernel/lockdep.c @@ -1869,7 +1869,7 @@ void trace_hardirqs_on(void) struct task_struct *curr = current; unsigned long ip; - if (unlikely(!debug_locks)) + if (unlikely(!debug_locks || current->lockdep_recursion)) return; if (DEBUG_WARN_ON(unlikely(!early_boot_irqs_enabled))) @@ -1916,7 +1916,7 @@ void trace_hardirqs_off(void) { struct task_struct *curr = current; - if (unlikely(!debug_locks)) + if (unlikely(!debug_locks || current->lockdep_recursion)) return; if (DEBUG_WARN_ON(!irqs_disabled())) @@ -2488,16 +2488,16 @@ void lock_acquire(struct lockdep_map *lo { unsigned long flags; + if (unlikely(current->lockdep_recursion)) + return; + raw_local_irq_save(flags); check_flags(flags); - if (unlikely(current->lockdep_recursion)) - goto out; current->lockdep_recursion = 1; __lock_acquire(lock, subtype, trylock, read, check, irqs_disabled_flags(flags), ip); current->lockdep_recursion = 0; -out: raw_local_irq_restore(flags); } @@ -2507,14 +2507,14 @@ void lock_release(struct lockdep_map *lo { unsigned long flags; + if (unlikely(current->lockdep_recursion)) + return; + raw_local_irq_save(flags); check_flags(flags); - if (unlikely(current->lockdep_recursion)) - goto out; current->lockdep_recursion = 1; __lock_release(lock, nested, ip); current->lockdep_recursion = 0; -out: raw_local_irq_restore(flags); } _ Patches currently in -mm which might be from mingo@xxxxxxx are origin.patch disable-debugging-version-of-write_lock.patch fix-drivers-mfd-ucb1x00-corec-irq-probing-bug.patch cifs-remove-f_ownerlock-use.patch lock-validator-fix-ns83820c-irq-flags-bug.patch revert-gregkh-pci-pci-test-that-drivers-properly-call-pci_set_master.patch x86-re-enable-generic-numa.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-tidy.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-arch_vma_name-fix.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-vs-x86_64-mm-reliable-stack-trace-support-i386.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-vs-x86_64-mm-reliable-stack-trace-support-i386-2.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-vs-x86_64-mm-reliable-stack-trace-support-i386-2-revert-maxmem-change.patch work-around-ppc64-bootup-bug-by-making-mutex-debugging-save-restore-irqs.patch kernel-kernel-cpuc-to-mutexes.patch define-__raw_get_cpu_var-and-use-it.patch ide-cd-end-of-media-error-fix.patch spin-rwlock-init-cleanups.patch lock-validator-introduce-warn_on_oncecond.patch lock-validator-introduce-warn_on_oncecond-speedup.patch emu10k1-mark-midi_spinlock-as-used.patch epoll-use-unlocked-wqueue-operations.patch reintegrate-irqreturnh-into-hardirqh.patch time-clocksource-infrastructure.patch sched-fix-smt-nice-lock-contention-and-optimization.patch sched-fix-smt-nice-lock-contention-and-optimization-tidy.patch sched-comment-bitmap-size-accounting.patch sched-fix-interactive-ceiling-code.patch sched-cpu-hotplug-race-vs-set_cpus_allowed.patch sched-implement-smpnice.patch sched-protect-calculation-of-max_pull-from-integer-wrap.patch sched-store-weighted-load-on-up.patch sched-add-discrete-weighted-cpu-load-function.patch sched-prevent-high-load-weight-tasks-suppressing-balancing.patch sched-improve-stability-of-smpnice-load-balancing.patch sched-improve-smpnice-load-balancing-when-load-per-task.patch smpnice-dont-consider-sched-groups-which-are-lightly-loaded-for-balancing.patch smpnice-dont-consider-sched-groups-which-are-lightly-loaded-for-balancing-fix.patch sched-modify-move_tasks-to-improve-load-balancing-outcomes.patch sched-avoid-unnecessarily-moving-highest-priority-task-move_tasks.patch sched-avoid-unnecessarily-moving-highest-priority-task-move_tasks-fix-2.patch sched_domain-handle-kmalloc-failure.patch sched_domain-handle-kmalloc-failure-fix.patch sched_domain-dont-use-gfp_atomic.patch sched_domain-use-kmalloc_node.patch sched_domain-allocate-sched_group-structures-dynamically.patch sched-mc-smt-power-savings-sched-policy.patch sched_exit-fix-parent-time_slice-calculation.patch sched_exit-move-the-callsite-to-do_exit.patch sched-uninline-task_rq_lock.patch bug-if-setscheduler-is-called-from-interrupt-context.patch sched-add-above-background-load-function.patch mm-implement-swap-prefetching.patch pi-futex-futex-code-cleanups.patch pi-futex-robust-futex-docs-fix.patch pi-futex-introduce-debug_check_no_locks_freed.patch pi-futex-introduce-warn_on_smp.patch pi-futex-add-plist-implementation.patch pi-futex-scheduler-support-for-pi.patch pi-futex-rt-mutex-core.patch pi-futex-rt-mutex-docs.patch pi-futex-rt-mutex-docs-update.patch pi-futex-rt-mutex-debug.patch pi-futex-rt-mutex-tester.patch pi-futex-rt-mutex-tester-fix.patch pi-futex-rt-mutex-futex-api.patch pi-futex-futex_lock_pi-futex_unlock_pi-support.patch pi-futex-futex_lock_pi-futex_unlock_pi-support-fix.patch rtmutex-modify-rtmutex-tester-to-test-the-setscheduler.patch futex_requeue-optimization.patch fix-ide-deadlock-in-error-reporting-code.patch genirq-rename-desc-handler-to-desc-chip.patch genirq-rename-desc-handler-to-desc-chip-power-fix.patch genirq-rename-desc-handler-to-desc-chip-ia64-fix.patch genirq-rename-desc-handler-to-desc-chip-ia64-fix-2.patch genirq-rename-desc-handler-to-desc-chip-terminate_irqs-fix.patch genirq-sem2mutex-probe_sem-probing_active.patch genirq-cleanup-merge-irq_affinity-into-irq_desc.patch genirq-cleanup-remove-irq_descp.patch genirq-cleanup-remove-irq_descp-fix.patch genirq-cleanup-remove-fastcall.patch genirq-cleanup-misc-code-cleanups.patch genirq-cleanup-reduce-irq_desc_t-use-mark-it-obsolete.patch genirq-cleanup-include-linux-irqh.patch genirq-cleanup-merge-irq_dir-smp_affinity_entry-into-irq_desc.patch genirq-cleanup-merge-pending_irq_cpumask-into-irq_desc.patch genirq-cleanup-turn-arch_has_irq_per_cpu-into-config_irq_per_cpu.patch genirq-debug-better-debug-printout-in-enable_irq.patch genirq-add-retrigger-irq-op-to-consolidate-hw_irq_resend.patch genirq-doc-comment-include-linux-irqh-structures.patch genirq-doc-handle_irq_event-and-__do_irq-comments.patch genirq-cleanup-no_irq_type-cleanups.patch genirq-doc-add-design-documentation.patch genirq-add-genirq-sw-irq-retrigger.patch genirq-add-irq_noprobe-support.patch genirq-add-irq_norequest-support.patch genirq-add-irq_noautoen-support.patch genirq-update-copyrights.patch genirq-core.patch genirq-msi-fixes-2.patch genirq-add-irq-chip-support.patch genirq-add-irq-chip-support-fix.patch genirq-add-irq-chip-support-misroute-irq-dont-call-desc-chip-end.patch genirq-add-handle_bad_irq.patch genirq-add-irq-wake-power-management-support.patch genirq-add-sa_trigger-support.patch genirq-cleanup-no_irq_type-no_irq_chip-rename.patch genirq-more-verbose-debugging-on-unexpected-irq-vectors.patch genirq-ia64-build-fix.patch genirq-add-irq_type_sense_mask.patch genirq-add-irq-chip-support-fasteoi-handler-handle-interrupt-disabling.patch genirq-irq-document-what-an-irq-is.patch genirq-add-chip-eoi-fastack-fasteoi-core.patch genirq-add-chip-eoi-fastack-fasteoi-fix.patch acpi-reduce-code-size-clean-up-fix-validator-message.patch lock-validator-special-locking-kgdb.patch detect-atomic-counter-underflows.patch debug-shared-irqs.patch make-frame_pointer-default=y.patch mutex-subsystem-synchro-test-module.patch vdso-print-fatal-signals.patch vdso-improve-print_fatal_signals-support-by-adding-memory-maps.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html