On Thu, 28 April 2011 Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote: > On Thu, 28 Apr 2011, Sedat Dilek wrote: > > On Thu, Apr 28, 2011 at 3:30 PM, Mike Galbraith <efault@xxxxxx> wrote: > > rt_rq[0]: > > .rt_nr_running : 0 > > .rt_throttled : 0 > > > .rt_time : 888.893877 > > > .rt_time : 950.005460 > > So rt_time is constantly accumulated, but never decreased. The > decrease happens in the timer callback. Looks like the timer is not > running for whatever reason. > > Can you add the following patch as well ? > > Thanks, > > tglx > > --- linux-2.6.orig/kernel/sched.c > +++ linux-2.6/kernel/sched.c > @@ -172,7 +172,7 @@ static enum hrtimer_restart sched_rt_per > idle = do_sched_rt_period_timer(rt_b, overrun); > } > > - return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; > + return HRTIMER_RESTART; This doesn't help here. Be it applied on top of the others, full diff attached or applied alone (with throttling printk). Could it be that NO_HZ=y has some importance in this matter? Extended throttling printk (Linus asked what exact values were looking like): [ 401.000119] sched: RT throttling activated 950012539 > 950000000 Equivalent to what Sedat sees (/proc/sched_debug): rt_rq[0]: .rt_nr_running : 2 .rt_throttled : 1 .rt_time : 950.012539 .rt_runtime : 950.000000 /proc/$(pidof rcu_kthread)/sched captured at regular intervals: Thu Apr 28 21:33:41 CEST 2011 rcu_kthread (6, #threads: 1) --------------------------------------------------------- se.exec_start : 0.000000 se.vruntime : 0.000703 se.sum_exec_runtime : 903.067982 nr_switches : 23752 nr_voluntary_switches : 23751 nr_involuntary_switches : 1 se.load.weight : 1024 policy : 1 prio : 98 clock-delta : 912 Thu Apr 28 21:34:11 CEST 2011 rcu_kthread (6, #threads: 1) --------------------------------------------------------- se.exec_start : 0.000000 se.vruntime : 0.000703 se.sum_exec_runtime : 974.899495 nr_switches : 25721 nr_voluntary_switches : 25720 nr_involuntary_switches : 1 se.load.weight : 1024 policy : 1 prio : 98 clock-delta : 1098 Thu Apr 28 21:34:41 CEST 2011 rcu_kthread (6, #threads: 1) --------------------------------------------------------- se.exec_start : 0.000000 se.vruntime : 0.000703 se.sum_exec_runtime : 974.899495 nr_switches : 25721 nr_voluntary_switches : 25720 nr_involuntary_switches : 1 se.load.weight : 1024 policy : 1 prio : 98 clock-delta : 1126 Thu Apr 28 21:35:11 CEST 2011 rcu_kthread (6, #threads: 1) > } > > static
diff --git a/kernel/sched.c b/kernel/sched.c index 312f8b9..aad1b88 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -172,7 +172,7 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) idle = do_sched_rt_period_timer(rt_b, overrun); } - return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; + return /* idle ? HRTIMER_NORESTART : */ HRTIMER_RESTART; } static @@ -460,7 +460,7 @@ struct rq { u64 nohz_stamp; unsigned char nohz_balance_kick; #endif - unsigned int skip_clock_update; + int skip_clock_update; /* capture load from *all* tasks on this cpu: */ struct load_weight load; @@ -642,8 +642,8 @@ static void update_rq_clock(struct rq *rq) { s64 delta; - if (rq->skip_clock_update) - return; +/* if (rq->skip_clock_update > 0) + return; */ delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; rq->clock += delta; @@ -4035,7 +4035,7 @@ static inline void schedule_debug(struct task_struct *prev) static void put_prev_task(struct rq *rq, struct task_struct *prev) { - if (prev->se.on_rq) + if (prev->se.on_rq || rq->skip_clock_update < 0) update_rq_clock(rq); prev->sched_class->put_prev_task(rq, prev); } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index e7cebdc..2feae93 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -572,8 +572,15 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) enqueue = 1; } - if (enqueue) + if (enqueue) { + /* + * Tag a forced clock update if we're coming out of idle + * so rq->clock_task will be updated when we schedule(). + */ + if (rq->curr == rq->idle) + rq->skip_clock_update = -1; sched_rt_rq_enqueue(rt_rq); + } raw_spin_unlock(&rq->lock); } @@ -608,6 +615,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) return 0; if (rt_rq->rt_time > runtime) { + printk_once(KERN_WARNING "sched: RT throttling activated %llu > %llu\n", rt_rq->rt_time, runtime); rt_rq->rt_throttled = 1; if (rt_rq_throttled(rt_rq)) { sched_rt_rq_dequeue(rt_rq);