On Wed, 2011-01-05 at 18:04 +0100, Peter Zijlstra wrote: > On Wed, 2011-01-05 at 17:57 +0100, Mike Galbraith wrote: > > + p_cfs_rq = cfs_rq_of(pse); > > + local = 1; > > + } > > +#endif > > + > > + /* Tell the scheduler that we'd really like pse to run next. */ > > + p_cfs_rq->next = pse; > > + > > + /* We know whether we want to preempt or not, but are we allowed? */ > > + preempt &= same_thread_group(p, task_of(p_cfs_rq->curr)); > > + > > + if (local) > > + clear_buddies(cfs_rq, se); > > You might want to clear before setting next :-) Or better, just remove dept. of redundancy dept. cruft. We clear buddies upon selection. It's also pointless worrying whether to set TIF_RESCHED or not, no cycle savings to be had there methinks. While performing cruftectomy, also did cosmetic int ==> bool. sched: Add yield_to(task, preempt) functionality. Currently only implemented for fair class tasks. Add a yield_to_task method() to the fair scheduling class. allowing the caller of yield_to() to accelerate another thread in it's thread group, task group, and sched class toward either it's cpu, or potentially the caller's own cpu if the 'preempt' argument is also passed. Implemented via a scheduler hint, using cfs_rq->next to encourage the target being selected. Signed-off-by: Rik van Riel <riel@xxxxxxxxxx> Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Signed-off-by: Mike Galbraith <efault@xxxxxx> --- include/linux/sched.h | 1 kernel/sched.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched_fair.c | 44 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) Index: linux-2.6/include/linux/sched.h =================================================================== --- linux-2.6.orig/include/linux/sched.h +++ linux-2.6/include/linux/sched.h @@ -1056,6 +1056,7 @@ struct sched_class { void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); void (*yield_task) (struct rq *rq); + bool (*yield_to_task) (struct task_struct *p, bool preempt); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); Index: linux-2.6/kernel/sched.c =================================================================== --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -5327,6 +5327,62 @@ void __sched yield(void) } EXPORT_SYMBOL(yield); +/** + * yield_to - yield the current processor to another thread in + * your thread group, or accelerate that thread toward the + * processor it's on. + * + * It's the caller's job to ensure that the target task struct + * can't go away on us before we can do any checks. + */ +void __sched yield_to(struct task_struct *p, bool preempt) +{ + struct task_struct *curr = current; + struct rq *rq, *p_rq; + unsigned long flags; + bool yield = 0; + + local_irq_save(flags); + rq = this_rq(); + +again: + p_rq = task_rq(p); + double_rq_lock(rq, p_rq); + while (task_rq(p) != p_rq) { + double_rq_unlock(rq, p_rq); + goto again; + } + + if (!curr->sched_class->yield_to_task) + goto out; + + if (curr->sched_class != p->sched_class) + goto out; + + if (task_running(p_rq, p) || p->state) + goto out; + + if (!same_thread_group(p, curr)) + goto out; + +#ifdef CONFIG_FAIR_GROUP_SCHED + if (task_group(p) != task_group(curr)) + goto out; +#endif + + yield = curr->sched_class->yield_to_task(p, preempt); + +out: + double_rq_unlock(rq, p_rq); + local_irq_restore(flags); + + if (yield) { + set_current_state(TASK_RUNNING); + schedule(); + } +} +EXPORT_SYMBOL_GPL(yield_to); + /* * This task is about to go to sleep on IO. Increment rq->nr_iowait so * that process accounting knows that this is a task in IO wait state. Index: linux-2.6/kernel/sched_fair.c =================================================================== --- linux-2.6.orig/kernel/sched_fair.c +++ linux-2.6/kernel/sched_fair.c @@ -1337,6 +1337,49 @@ static void yield_task_fair(struct rq *r } #ifdef CONFIG_SMP +static void pull_task(struct rq *src_rq, struct task_struct *p, + struct rq *this_rq, int this_cpu); +#endif + +static bool yield_to_task_fair(struct task_struct *p, bool preempt) +{ + struct sched_entity *se = ¤t->se; + struct sched_entity *pse = &p->se; + struct cfs_rq *cfs_rq = cfs_rq_of(se); + struct cfs_rq *p_cfs_rq = cfs_rq_of(pse); + int this_cpu = smp_processor_id(); + + if (!pse->on_rq) + return false; + +#ifdef CONFIG_SMP + /* + * If this yield is important enough to want to preempt instead + * of only dropping a ->next hint, we're alone, and the target + * is not alone, pull the target to this cpu. + * + * NOTE: the target may be alone in it's cfs_rq if another class + * task or another task group is currently executing on it's cpu. + * In this case, we still pull, to accelerate it toward the cpu. + */ + if (cfs_rq != p_cfs_rq && preempt && cfs_rq->nr_running == 1 && + cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { + pull_task(task_rq(p), p, this_rq(), this_cpu); + p_cfs_rq = cfs_rq_of(pse); + } +#endif + + /* Tell the scheduler that we'd really like pse to run next. */ + p_cfs_rq->next = pse; + + /* We know whether we want to preempt or not, but are we allowed? */ + if (preempt && same_thread_group(p, task_of(p_cfs_rq->curr))) + resched_task(task_of(p_cfs_rq->curr)); + + return cfs_rq == p_cfs_rq; +} + +#ifdef CONFIG_SMP static void task_waking_fair(struct rq *rq, struct task_struct *p) { @@ -4143,6 +4186,7 @@ static const struct sched_class fair_sch .enqueue_task = enqueue_task_fair, .dequeue_task = dequeue_task_fair, .yield_task = yield_task_fair, + .yield_to_task = yield_to_task_fair, .check_preempt_curr = check_preempt_wakeup, -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html