If a task blocks on a spinlock, give the CPU back as soon as possible so we can turn over the lock as quickly as possible. The task was at HEAD when it blocked, put it back, and tell everyone else to get the hell out of the way. Signed-off-by: Mike Galbraith <bitbucket@xxxxxxxxx> --- include/linux/sched.h | 1 + kernel/rtmutex.c | 13 +++++++++++-- kernel/sched/core.c | 9 +++++++-- kernel/sched/fair.c | 4 ++++ kernel/sched/rt.c | 29 +++++++++++++++++++++++++++-- 5 files changed, 50 insertions(+), 6 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1064,6 +1064,7 @@ struct sched_domain; #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x04 /* internal use, task got migrated */ #define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ +#define WF_REQUEUE 0x10 /* requeue spinlock "sleeper" */ #define ENQUEUE_WAKEUP 1 #define ENQUEUE_HEAD 2 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -722,7 +722,7 @@ static void noinline __sched rt_spin_lo { struct task_struct *lock_owner, *self = current; struct rt_mutex_waiter waiter, *top_waiter; - int ret; + int ret, wait, cpu = raw_smp_processor_id(); rt_mutex_init_waiter(&waiter, true); @@ -757,12 +757,21 @@ static void noinline __sched rt_spin_lo top_waiter = rt_mutex_top_waiter(lock); lock_owner = rt_mutex_owner(lock); + wait = top_waiter != &waiter; + + /* + * If we preempt the lock owner, just preempt ourselves. + * the now boosted lock owner is queued to queue head. + * When we release the wait lock, lock owner runs. + */ + if (!wait && task_cpu(lock_owner) == cpu) + set_tsk_need_resched(self); raw_spin_unlock(&lock->wait_lock); debug_rt_mutex_print_deadlock(&waiter); - if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) + if (wait || adaptive_wait(lock, lock_owner)) schedule_rt_mutex(lock); raw_spin_lock(&lock->wait_lock); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1611,7 +1611,12 @@ EXPORT_SYMBOL(wake_up_process); */ int wake_up_lock_sleeper(struct task_struct *p) { - return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); + int flags = WF_LOCK_SLEEPER; + + if (rt_task(p)) + flags |= WF_REQUEUE; + + return try_to_wake_up(p, TASK_ALL, flags); } int wake_up_state(struct task_struct *p, unsigned int state) @@ -3815,7 +3820,7 @@ void rt_mutex_setprio(struct task_struct if (running) p->sched_class->set_curr_task(rq); if (on_rq) - enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); + enqueue_task(rq, p, ENQUEUE_HEAD); check_class_changed(rq, p, prev_class, oldprio); out_unlock: --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3522,6 +3522,10 @@ static void check_preempt_wakeup(struct if (unlikely(se == pse)) return; + /* Preempting SCHED_OTHER lock holders harms throughput for no good reason */ + if (__migrate_disabled(curr)) + return; + /* * This is possible from callers such as move_task(), in which we * unconditionally check_prempt_curr() after an enqueue (which may have --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1180,6 +1180,10 @@ enqueue_task_rt(struct rq *rq, struct ta if (flags & ENQUEUE_WAKEUP) rt_se->timeout = 0; + /* The wakee is a FIFO lock sleeper */ + if (flags & WF_REQUEUE) + flags |= ENQUEUE_HEAD; + enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) @@ -1295,8 +1299,29 @@ select_task_rq_rt(struct task_struct *p, return cpu; } -static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p, int wake_flags) { +#ifdef CONFIG_PREEMPT_RT_BASE + if (wake_flags & WF_REQUEUE) { + if (!p->on_cpu) + requeue_task_rt(rq, p, 1); + + /* + * The lock owner was here first, top waiter + * must follow. If the owner was PI boosted, + * it's gone RSN. All others need to get off + * this CPU ASAP, this waiter had it first. + */ + if (rq == this_rq()) + requeue_task_rt(rq, rq->curr, 1); + else if (__migrate_disabled(rq->curr)) + set_tsk_need_resched(rq->curr); + else + resched_task(rq->curr); + + return; + } +#endif if (rq->curr->nr_cpus_allowed == 1) return; @@ -1342,7 +1367,7 @@ static void check_preempt_curr_rt(struct * task. */ if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) - check_preempt_equal_prio(rq, p); + check_preempt_equal_prio(rq, p, flags); #endif } -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html