Hi Sebastian, On 02/16/2015 12:18 PM, Sebastian Andrzej Siewior wrote: > Dear RT folks! > > I'm pleased to announce the v3.18.7-rt1 patch set. It was running over > the weekend on my x86 box and was still alive this morning. However it > is still the first release for the v3.18 -RT series. > I haven't follow the mailing list or commented / applied any patches > from the list for -RT while being busy getting this release done (except > one patch I needed to have anyway). This is about to change. I will try to > go through my RT-inbox before doing the next release. I needed the patch below to get it running stable under load on my shiny box. cheers, daniel >From c517743659575932d7b7c94a08276d0cee8a2fdd Mon Sep 17 00:00:00 2001 From: Daniel Wagner <daniel.wagner@xxxxxxxxxxxx> Date: Fri, 11 Jul 2014 15:26:13 +0200 Subject: [PATCH] thermal: Defer thermal wakups to threads On RT the spin lock in pkg_temp_thermal_platfrom_thermal_notify will call schedule while we run in irq context. [<ffffffff816850ac>] dump_stack+0x4e/0x8f [<ffffffff81680f7d>] __schedule_bug+0xa6/0xb4 [<ffffffff816896b4>] __schedule+0x5b4/0x700 [<ffffffff8168982a>] schedule+0x2a/0x90 [<ffffffff8168a8b5>] rt_spin_lock_slowlock+0xe5/0x2d0 [<ffffffff8168afd5>] rt_spin_lock+0x25/0x30 [<ffffffffa03a7b75>] pkg_temp_thermal_platform_thermal_notify+0x45/0x134 [x86_pkg_temp_thermal] [<ffffffff8103d4db>] ? therm_throt_process+0x1b/0x160 [<ffffffff8103d831>] intel_thermal_interrupt+0x211/0x250 [<ffffffff8103d8c1>] smp_thermal_interrupt+0x21/0x40 [<ffffffff8169415d>] thermal_interrupt+0x6d/0x80 Let's defer the work to a kthread. Signed-off-by: Daniel Wagner <daniel.wagner@xxxxxxxxxxxx> Cc: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- drivers/thermal/x86_pkg_temp_thermal.c | 49 ++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 9ea3d9d..001ba02 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,6 +29,7 @@ #include <linux/pm.h> #include <linux/thermal.h> #include <linux/debugfs.h> +#include <linux/work-simple.h> #include <asm/cpu_device_id.h> #include <asm/mce.h> @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } } -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +static void platform_thermal_notify_work(struct swork_event *event) { unsigned long flags; int cpu = smp_processor_id(); @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) pkg_work_scheduled[phy_id]) { disable_pkg_thres_interrupt(); spin_unlock_irqrestore(&pkg_work_lock, flags); - return -EINVAL; + return; } pkg_work_scheduled[phy_id] = 1; spin_unlock_irqrestore(&pkg_work_lock, flags); @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) schedule_delayed_work_on(cpu, &per_cpu(pkg_temp_thermal_threshold_work, cpu), msecs_to_jiffies(notify_delay_ms)); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static struct swork_event notify_work; + +static int thermal_notify_work_init(void) +{ + int err; + + err = swork_get(); + if (!err) + return err; + + INIT_SWORK(¬ify_work, platform_thermal_notify_work); + return 0; +} + +static void thermal_notify_work_cleanup(void) +{ + swork_put(); +} + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + swork_queue(¬ify_work); return 0; } +#else /* !CONFIG_PREEMPT_RT_FULL */ + +static int thermal_notify_work_init(void) { return 0; } + +static int thermal_notify_work_cleanup(void) { } + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + platform_thermal_notify_work(NULL); + + return 0; +} +#endif /* CONFIG_PREEMPT_RT_FULL */ + static int find_siblings_cpu(int cpu) { int i; @@ -594,6 +634,10 @@ static int __init pkg_temp_thermal_init(void) for_each_online_cpu(i) if (get_core_online(i)) goto err_ret; + + if (!thermal_notify_work_init()) + goto err_ret; + __register_hotcpu_notifier(&pkg_temp_thermal_notifier); cpu_notifier_register_done(); @@ -619,6 +663,7 @@ static void __exit pkg_temp_thermal_exit(void) cpu_notifier_register_begin(); __unregister_hotcpu_notifier(&pkg_temp_thermal_notifier); + thermal_notify_work_cleanup(); mutex_lock(&phy_dev_list_mutex); list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { /* Retore old MSR value for package thermal interrupt */ -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html