On Tue, Aug 11, 2020 at 11:46:51AM +0200, peterz@xxxxxxxxxxxxx wrote: > So let me once again see if I can't find a better solution for this all. > Clearly it needs one :/ So the below boots without triggering the debug code from Marco -- it should allow nesting local_irq_save/restore under raw_local_irq_*(). I tried unconditional counting, but there's some _reallly_ wonky / asymmetric code that wrecks that and I've not been able to come up with anything useful. This one starts counting when local_irq_save() finds it didn't disable IRQs while lockdep though it did. At that point, local_irq_restore() will decrement and enable things again when it reaches 0. This assumes local_irq_save()/local_irq_restore() are nested sane, which is mostly true. This leaves #PF, which I fixed in these other patches, but I realized it needs fixing for all architectures :-( No bright ideas there yet. --- arch/x86/entry/thunk_32.S | 5 ---- include/linux/irqflags.h | 45 +++++++++++++++++++------------- init/main.c | 16 ++++++++++++ kernel/locking/lockdep.c | 58 +++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace_preemptirq.c | 33 +++++++++++++++++++++++ 5 files changed, 134 insertions(+), 23 deletions(-) diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 3a07ce3ec70b..f1f96d4d8cd6 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -29,11 +29,6 @@ SYM_CODE_START_NOALIGN(\name) SYM_CODE_END(\name) .endm -#ifdef CONFIG_TRACE_IRQFLAGS - THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 - THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 -#endif - #ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index bd5c55755447..67e2a16d3846 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -23,12 +23,16 @@ extern void lockdep_hardirqs_on_prepare(unsigned long ip); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); + extern void lockdep_hardirqs_save(unsigned long ip, unsigned long flags); + extern void lockdep_hardirqs_restore(unsigned long ip, unsigned long flags); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } + static inline void lockdep_hardirqs_save(unsigned long ip, unsigned long flags) { } + static inline void lockdep_hardirqs_restore(unsigned long ip, unsigned long flags) { } #endif #ifdef CONFIG_TRACE_IRQFLAGS @@ -49,10 +53,13 @@ struct irqtrace_events { DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); - extern void trace_hardirqs_on_prepare(void); - extern void trace_hardirqs_off_finish(void); - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); +extern void trace_hardirqs_on_prepare(void); +extern void trace_hardirqs_off_finish(void); +extern void trace_hardirqs_on(void); +extern void trace_hardirqs_off(void); +extern void trace_hardirqs_save(unsigned long flags); +extern void trace_hardirqs_restore(unsigned long flags); + # define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) # define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) @@ -120,17 +127,19 @@ do { \ #else # define trace_hardirqs_on_prepare() do { } while (0) # define trace_hardirqs_off_finish() do { } while (0) -# define trace_hardirqs_on() do { } while (0) -# define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_threaded() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) +# define trace_hardirqs_on() do { } while (0) +# define trace_hardirqs_off() do { } while (0) +# define trace_hardirqs_save(flags) do { } while (0) +# define trace_hardirqs_restore(flags) do { } while (0) +# define lockdep_hardirq_context() 0 +# define lockdep_softirq_context(p) 0 +# define lockdep_hardirqs_enabled() 0 +# define lockdep_softirqs_enabled(p) 0 +# define lockdep_hardirq_enter() do { } while (0) +# define lockdep_hardirq_threaded() do { } while (0) +# define lockdep_hardirq_exit() do { } while (0) +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false # define lockdep_hrtimer_exit(__context) do { } while (0) # define lockdep_posixtimer_enter() do { } while (0) @@ -185,18 +194,18 @@ do { \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) + #define local_irq_save(flags) \ do { \ raw_local_irq_save(flags); \ - trace_hardirqs_off(); \ + trace_hardirqs_save(flags); \ } while (0) - #define local_irq_restore(flags) \ do { \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ - trace_hardirqs_off(); \ + trace_hardirqs_restore(flags); \ } else { \ trace_hardirqs_on(); \ raw_local_irq_restore(flags); \ diff --git a/init/main.c b/init/main.c index 15bd0efff3df..0873319dcff4 100644 --- a/init/main.c +++ b/init/main.c @@ -1041,6 +1041,22 @@ asmlinkage __visible void __init start_kernel(void) sfi_init_late(); kcsan_init(); + /* DEBUG CODE */ + lockdep_assert_irqs_enabled(); /* Pass. */ + { + unsigned long flags1; + raw_local_irq_save(flags1); + { + unsigned long flags2; + lockdep_assert_irqs_enabled(); /* Pass - expectedly blind. */ + local_irq_save(flags2); + lockdep_assert_irqs_disabled(); /* Pass. */ + local_irq_restore(flags2); + } + raw_local_irq_restore(flags1); + } + lockdep_assert_irqs_enabled(); /* FAIL! */ + /* Do the rest non-__init'ed, we're now alive */ arch_call_rest_init(); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 3617abb08e31..2c88574b817c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3763,6 +3763,30 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) } EXPORT_SYMBOL_GPL(lockdep_hardirqs_on); +static DEFINE_PER_CPU(int, hardirqs_disabled); + +void lockdep_hardirqs_restore(unsigned long ip, unsigned long flags) +{ + if (unlikely(!debug_locks)) + return; + + if (in_nmi()) { + if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI)) + return; + } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK) + return; + + if (__this_cpu_read(hardirqs_disabled) && + __this_cpu_dec_return(hardirqs_disabled) == 0) { + + lockdep_hardirqs_on_prepare(ip); + lockdep_hardirqs_on(ip); + } else { + lockdep_hardirqs_off(ip); + } +} +EXPORT_SYMBOL_GPL(lockdep_hardirqs_restore); + /* * Hardirqs were disabled: */ @@ -3805,6 +3829,40 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) } EXPORT_SYMBOL_GPL(lockdep_hardirqs_off); +void lockdep_hardirqs_save(unsigned long ip, unsigned long flags) +{ + if (unlikely(!debug_locks)) + return; + + /* + * Matching lockdep_hardirqs_on(), allow NMIs in the middle of lockdep; + * they will restore the software state. This ensures the software + * state is consistent inside NMIs as well. + */ + if (in_nmi()) { + if (!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_NMI)) + return; + } else if (current->lockdep_recursion & LOCKDEP_RECURSION_MASK) + return; + + /* + * If IRQs were disabled, but IRQ tracking state said enabled we + * 'missed' an update (or are nested inside raw_local_irq_*()) and + * cannot rely on IRQ state to tell us when we should enable again. + * + * Count our way through this. + */ + if (raw_irqs_disabled_flags(flags)) { + if (__this_cpu_read(hardirqs_enabled)) { + WARN_ON_ONCE(__this_cpu_read(hardirqs_disabled) != 0); + __this_cpu_write(hardirqs_disabled, 1); + } else if (__this_cpu_read(hardirqs_disabled)) + __this_cpu_inc(hardirqs_disabled); + } + lockdep_hardirqs_off(ip); +} +EXPORT_SYMBOL_GPL(lockdep_hardirqs_save); + /* * Softirqs will be enabled: */ diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f10073e62603..080deaa1d9c9 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -85,6 +85,36 @@ void trace_hardirqs_off(void) EXPORT_SYMBOL(trace_hardirqs_off); NOKPROBE_SYMBOL(trace_hardirqs_off); +void trace_hardirqs_save(unsigned long flags) +{ + lockdep_hardirqs_save(CALLER_ADDR0, flags); + + if (!this_cpu_read(tracing_irq_cpu)) { + this_cpu_write(tracing_irq_cpu, 1); + tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); + if (!in_nmi()) + trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); + } +} +EXPORT_SYMBOL(trace_hardirqs_save); +NOKPROBE_SYMBOL(trace_hardirqs_save); + +void trace_hardirqs_restore(unsigned long flags) +{ + if (this_cpu_read(tracing_irq_cpu)) { + if (!in_nmi()) + trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); + tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); + this_cpu_write(tracing_irq_cpu, 0); + } + + lockdep_hardirqs_restore(CALLER_ADDR0, flags); +} +EXPORT_SYMBOL(trace_hardirqs_restore); +NOKPROBE_SYMBOL(trace_hardirqs_restore); + +#ifdef __s390__ + __visible void trace_hardirqs_on_caller(unsigned long caller_addr) { if (this_cpu_read(tracing_irq_cpu)) { @@ -113,6 +143,9 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) } EXPORT_SYMBOL(trace_hardirqs_off_caller); NOKPROBE_SYMBOL(trace_hardirqs_off_caller); + +#endif /* __s390__ */ + #endif /* CONFIG_TRACE_IRQFLAGS */ #ifdef CONFIG_TRACE_PREEMPT_TOGGLE _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization