On Tue, 17 Feb 2015 09:37:44 +0100 Daniel Wagner <wagi@xxxxxxxxx> wrote: > I needed the patch below to get it running stable under load on my > shiny box. FWIW, this patch makes 3.18-rt survive thermal events on my laptop. > From c517743659575932d7b7c94a08276d0cee8a2fdd Mon Sep 17 00:00:00 2001 > From: Daniel Wagner <daniel.wagner@xxxxxxxxxxxx> > Date: Fri, 11 Jul 2014 15:26:13 +0200 > Subject: [PATCH] thermal: Defer thermal wakups to threads > > On RT the spin lock in pkg_temp_thermal_platfrom_thermal_notify will > call schedule while we run in irq context. > > [<ffffffff816850ac>] dump_stack+0x4e/0x8f > [<ffffffff81680f7d>] __schedule_bug+0xa6/0xb4 > [<ffffffff816896b4>] __schedule+0x5b4/0x700 > [<ffffffff8168982a>] schedule+0x2a/0x90 > [<ffffffff8168a8b5>] rt_spin_lock_slowlock+0xe5/0x2d0 > [<ffffffff8168afd5>] rt_spin_lock+0x25/0x30 > [<ffffffffa03a7b75>] > pkg_temp_thermal_platform_thermal_notify+0x45/0x134 > [x86_pkg_temp_thermal] [<ffffffff8103d4db>] ? > therm_throt_process+0x1b/0x160 [<ffffffff8103d831>] > intel_thermal_interrupt+0x211/0x250 [<ffffffff8103d8c1>] > smp_thermal_interrupt+0x21/0x40 [<ffffffff8169415d>] > thermal_interrupt+0x6d/0x80 > > Let's defer the work to a kthread. > > Signed-off-by: Daniel Wagner <daniel.wagner@xxxxxxxxxxxx> > Cc: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> > --- > drivers/thermal/x86_pkg_temp_thermal.c | 49 > ++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), > 2 deletions(-) > > diff --git a/drivers/thermal/x86_pkg_temp_thermal.c > b/drivers/thermal/x86_pkg_temp_thermal.c index 9ea3d9d..001ba02 100644 > --- a/drivers/thermal/x86_pkg_temp_thermal.c > +++ b/drivers/thermal/x86_pkg_temp_thermal.c > @@ -29,6 +29,7 @@ > #include <linux/pm.h> > #include <linux/thermal.h> > #include <linux/debugfs.h> > +#include <linux/work-simple.h> > #include <asm/cpu_device_id.h> > #include <asm/mce.h> > > @@ -352,7 +353,7 @@ static void > pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } > } > > -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) > +static void platform_thermal_notify_work(struct swork_event *event) > { > unsigned long flags; > int cpu = smp_processor_id(); > @@ -369,7 +370,7 @@ static int > pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) > pkg_work_scheduled[phy_id]) { disable_pkg_thres_interrupt(); > spin_unlock_irqrestore(&pkg_work_lock, flags); > - return -EINVAL; > + return; > } > pkg_work_scheduled[phy_id] = 1; > spin_unlock_irqrestore(&pkg_work_lock, flags); > @@ -378,9 +379,48 @@ static int > pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) > schedule_delayed_work_on(cpu, > &per_cpu(pkg_temp_thermal_threshold_work, cpu), > msecs_to_jiffies(notify_delay_ms)); +} > + > +#ifdef CONFIG_PREEMPT_RT_FULL > +static struct swork_event notify_work; > + > +static int thermal_notify_work_init(void) > +{ > + int err; > + > + err = swork_get(); > + if (!err) > + return err; > + > + INIT_SWORK(¬ify_work, platform_thermal_notify_work); > + return 0; > +} > + > +static void thermal_notify_work_cleanup(void) > +{ > + swork_put(); > +} > + > +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) > +{ > + swork_queue(¬ify_work); > return 0; > } > > +#else /* !CONFIG_PREEMPT_RT_FULL */ > + > +static int thermal_notify_work_init(void) { return 0; } > + > +static int thermal_notify_work_cleanup(void) { } > + > +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) > +{ > + platform_thermal_notify_work(NULL); > + > + return 0; > +} > +#endif /* CONFIG_PREEMPT_RT_FULL */ > + > static int find_siblings_cpu(int cpu) > { > int i; > @@ -594,6 +634,10 @@ static int __init pkg_temp_thermal_init(void) > for_each_online_cpu(i) > if (get_core_online(i)) > goto err_ret; > + > + if (!thermal_notify_work_init()) > + goto err_ret; > + > __register_hotcpu_notifier(&pkg_temp_thermal_notifier); > cpu_notifier_register_done(); > > @@ -619,6 +663,7 @@ static void __exit pkg_temp_thermal_exit(void) > > cpu_notifier_register_begin(); > __unregister_hotcpu_notifier(&pkg_temp_thermal_notifier); > + thermal_notify_work_cleanup(); > mutex_lock(&phy_dev_list_mutex); > list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { > /* Retore old MSR value for package thermal > interrupt */ -- Joakim -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html