BTW, if I were a reader, I'd be asking "where are the cold hard numbers showing that this is a good thing to do?" They're missing because I don't have any. This is the fallout from turning spinlocks back into 100% spinning locks for rt explorations, which _can_ improve semop throughput up to 7 fold when combined with a "Ok boss, how long may I spin before I have to check for other runnable tasks at my prio" knob, but which also adds a little overhead to the general case for one (making it a FAIL), and will chew huge amounts of CPU in the heavily contended case just to close off most priority inversions should a high priority task block briefly while there's a low priority task runnable. Preemptible spinning locks are pure evil. Doing this wake to head _should_ speed up lock turnaround, but I have zero hard evidence that it really makes any difference in practice, so yawn/NAK is perfectly understandable :) These are only the leftover bits that I think might be worth a ponder or two after declaring preemptible spinning locks to be too evil to live. On Sun, 2013-04-14 at 15:34 +0200, Mike Galbraith wrote: > If a task blocks on a spinlock, give the CPU back as soon as possible so > we can turn over the lock as quickly as possible. The task was at HEAD > when it blocked, put it back, and tell everyone else to get the hell out > of the way. > > Signed-off-by: Mike Galbraith <bitbucket@xxxxxxxxx> > --- > include/linux/sched.h | 1 + > kernel/rtmutex.c | 13 +++++++++++-- > kernel/sched/core.c | 9 +++++++-- > kernel/sched/fair.c | 4 ++++ > kernel/sched/rt.c | 29 +++++++++++++++++++++++++++-- > 5 files changed, 50 insertions(+), 6 deletions(-) > > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1064,6 +1064,7 @@ struct sched_domain; > #define WF_FORK 0x02 /* child wakeup after fork */ > #define WF_MIGRATED 0x04 /* internal use, task got migrated */ > #define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ > +#define WF_REQUEUE 0x10 /* requeue spinlock "sleeper" */ > > #define ENQUEUE_WAKEUP 1 > #define ENQUEUE_HEAD 2 > --- a/kernel/rtmutex.c > +++ b/kernel/rtmutex.c > @@ -722,7 +722,7 @@ static void noinline __sched rt_spin_lo > { > struct task_struct *lock_owner, *self = current; > struct rt_mutex_waiter waiter, *top_waiter; > - int ret; > + int ret, wait, cpu = raw_smp_processor_id(); > > rt_mutex_init_waiter(&waiter, true); > > @@ -757,12 +757,21 @@ static void noinline __sched rt_spin_lo > > top_waiter = rt_mutex_top_waiter(lock); > lock_owner = rt_mutex_owner(lock); > + wait = top_waiter != &waiter; > + > + /* > + * If we preempt the lock owner, just preempt ourselves. > + * the now boosted lock owner is queued to queue head. > + * When we release the wait lock, lock owner runs. > + */ > + if (!wait && task_cpu(lock_owner) == cpu) > + set_tsk_need_resched(self); > > raw_spin_unlock(&lock->wait_lock); > > debug_rt_mutex_print_deadlock(&waiter); > > - if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) > + if (wait || adaptive_wait(lock, lock_owner)) > schedule_rt_mutex(lock); > > raw_spin_lock(&lock->wait_lock); > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -1611,7 +1611,12 @@ EXPORT_SYMBOL(wake_up_process); > */ > int wake_up_lock_sleeper(struct task_struct *p) > { > - return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); > + int flags = WF_LOCK_SLEEPER; > + > + if (rt_task(p)) > + flags |= WF_REQUEUE; > + > + return try_to_wake_up(p, TASK_ALL, flags); > } > > int wake_up_state(struct task_struct *p, unsigned int state) > @@ -3815,7 +3820,7 @@ void rt_mutex_setprio(struct task_struct > if (running) > p->sched_class->set_curr_task(rq); > if (on_rq) > - enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); > + enqueue_task(rq, p, ENQUEUE_HEAD); > > check_class_changed(rq, p, prev_class, oldprio); > out_unlock: > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -3522,6 +3522,10 @@ static void check_preempt_wakeup(struct > if (unlikely(se == pse)) > return; > > + /* Preempting SCHED_OTHER lock holders harms throughput for no good reason */ > + if (__migrate_disabled(curr)) > + return; > + > /* > * This is possible from callers such as move_task(), in which we > * unconditionally check_prempt_curr() after an enqueue (which may have > --- a/kernel/sched/rt.c > +++ b/kernel/sched/rt.c > @@ -1180,6 +1180,10 @@ enqueue_task_rt(struct rq *rq, struct ta > if (flags & ENQUEUE_WAKEUP) > rt_se->timeout = 0; > > + /* The wakee is a FIFO lock sleeper */ > + if (flags & WF_REQUEUE) > + flags |= ENQUEUE_HEAD; > + > enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); > > if (!task_current(rq, p) && p->nr_cpus_allowed > 1) > @@ -1295,8 +1299,29 @@ select_task_rq_rt(struct task_struct *p, > return cpu; > } > > -static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) > +static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p, int wake_flags) > { > +#ifdef CONFIG_PREEMPT_RT_BASE > + if (wake_flags & WF_REQUEUE) { > + if (!p->on_cpu) > + requeue_task_rt(rq, p, 1); > + > + /* > + * The lock owner was here first, top waiter > + * must follow. If the owner was PI boosted, > + * it's gone RSN. All others need to get off > + * this CPU ASAP, this waiter had it first. > + */ > + if (rq == this_rq()) > + requeue_task_rt(rq, rq->curr, 1); > + else if (__migrate_disabled(rq->curr)) > + set_tsk_need_resched(rq->curr); > + else > + resched_task(rq->curr); > + > + return; > + } > +#endif > if (rq->curr->nr_cpus_allowed == 1) > return; > > @@ -1342,7 +1367,7 @@ static void check_preempt_curr_rt(struct > * task. > */ > if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) > - check_preempt_equal_prio(rq, p); > + check_preempt_equal_prio(rq, p, flags); > #endif > } > > > -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html