On Tue, Jan 4, 2011 at 5:29 AM, Rik van Riel <riel@xxxxxxxxxx> wrote: > From: Mike Galbraith <efault@xxxxxx> > > Add a yield_to function to the scheduler code, allowing us to > give enough of our timeslice to another thread to allow it to > run and release whatever resource we need it to release. > > We may want to use this to provide a sys_yield_to system call > one day. > > Signed-off-by: Rik van Riel <riel@xxxxxxxxxx> > Signed-off-by: Marcelo Tosatti <mtosatti@xxxxxxxxxx> > Not-signed-off-by: Mike Galbraith <efault@xxxxxx> > > --- > Mike, want to change the above into a Signed-off-by: ? :) > This code seems to work well. > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index c5f926c..0b8a3e6 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1083,6 +1083,7 @@ struct sched_class { >    Âvoid (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); >    Âvoid (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); >    Âvoid (*yield_task) (struct rq *rq); > +    int (*yield_to_task) (struct task_struct *p, int preempt); > >    Âvoid (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); > > @@ -1981,6 +1982,7 @@ static inline int rt_mutex_getprio(struct task_struct *p) > Â# define rt_mutex_adjust_pi(p)     do { } while (0) > Â#endif > > +extern void yield_to(struct task_struct *p, int preempt); > Âextern void set_user_nice(struct task_struct *p, long nice); > Âextern int task_prio(const struct task_struct *p); > Âextern int task_nice(const struct task_struct *p); > diff --git a/kernel/sched.c b/kernel/sched.c > index f8e5a25..ffa7a9d 100644 > --- a/kernel/sched.c > +++ b/kernel/sched.c > @@ -6901,6 +6901,53 @@ void __sched yield(void) > Â} > ÂEXPORT_SYMBOL(yield); > > +/** > + * yield_to - yield the current processor to another thread in > + * your thread group, or accelerate that thread toward the > + * processor it's on. > + * > + * It's the caller's job to ensure that the target task struct > + * can't go away on us before we can do any checks. > + */ > +void __sched yield_to(struct task_struct *p, int preempt) > +{ > +    struct task_struct *curr = current; > +    struct rq *rq, *p_rq; > +    unsigned long flags; > +    int yield = 0; > + > +    local_irq_save(flags); > +    rq = this_rq(); > + > +again: > +    p_rq = task_rq(p); > +    double_rq_lock(rq, p_rq); > +    while (task_rq(p) != p_rq) { > +        double_rq_unlock(rq, p_rq); > +        goto again; > +    } > + > +    if (task_running(p_rq, p) || p->state || !p->se.on_rq || > +            !same_thread_group(p, curr) || > +            !curr->sched_class->yield_to_task || > +            curr->sched_class != p->sched_class) { > +        goto out; > +    } > + > +    yield = curr->sched_class->yield_to_task(p, preempt); > + > +out: > +    double_rq_unlock(rq, p_rq); > +    local_irq_restore(flags); > + > +    if (yield) { > +        set_current_state(TASK_RUNNING); > +        schedule(); > +    } > +} > +EXPORT_SYMBOL(yield_to); > + > + > Â/* > Â* This task is about to go to sleep on IO. Increment rq->nr_iowait so > Â* that process accounting knows that this is a task in IO wait state. > diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c > index 5119b08..3288e7c 100644 > --- a/kernel/sched_fair.c > +++ b/kernel/sched_fair.c > @@ -1119,6 +1119,61 @@ static void yield_task_fair(struct rq *rq) > Â} > > Â#ifdef CONFIG_SMP > +static void pull_task(struct rq *src_rq, struct task_struct *p, > +           struct rq *this_rq, int this_cpu); > +#endif > + > +static int yield_to_task_fair(struct task_struct *p, int preempt) > +{ > +    struct sched_entity *se = ¤t->se; > +    struct sched_entity *pse = &p->se; > +    struct sched_entity *curr = &(task_rq(p)->curr)->se; > +    struct cfs_rq *cfs_rq = cfs_rq_of(se); > +    struct cfs_rq *p_cfs_rq = cfs_rq_of(pse); > +    int yield = this_rq() == task_rq(p); > +    int want_preempt = preempt; > + > +#ifdef CONFIG_FAIR_GROUP_SCHED > +    if (cfs_rq->tg != p_cfs_rq->tg) > +        return 0; > + > +    /* Preemption only allowed within the same task group. */ > +    if (preempt && cfs_rq->tg != cfs_rq_of(curr)->tg) > +        preempt = 0; > +#endif > +    /* Preemption only allowed within the same thread group. */ > +    if (preempt && !same_thread_group(current, task_of(p_cfs_rq->curr))) > +        preempt = 0; > + > +#ifdef CONFIG_SMP > +    /* > +    Â* If this yield is important enough to want to preempt instead > +    Â* of only dropping a ->next hint, we're alone, and the target > +    Â* is not alone, pull the target to this cpu. > +    Â*/ > +    if (want_preempt && !yield && cfs_rq->nr_running == 1 && > +            cpumask_test_cpu(smp_processor_id(), &p->cpus_allowed)) { > +        pull_task(task_rq(p), p, this_rq(), smp_processor_id()); > +        p_cfs_rq = cfs_rq_of(pse); > +        yield = 1; > +    } > +#endif > + > +    if (yield) > +        clear_buddies(cfs_rq, se); > +    else if (preempt) > +        clear_buddies(p_cfs_rq, curr); > + > +    /* Tell the scheduler that we'd really like pse to run next. */ > +    p_cfs_rq->next = pse; If not pulled and this_rq() != task_rq(p), only assigning ->next could kick p onto its CPU? If not, how is the lock contention eased then? A few words to explain please. thanks Hillf > + > +    if (!yield && preempt) > +        resched_task(task_of(p_cfs_rq->curr)); > + > +    return yield; > +} > + > +#ifdef CONFIG_SMP > > Â#ifdef CONFIG_FAIR_GROUP_SCHED > Â/* > @@ -2081,6 +2136,7 @@ static const struct sched_class fair_sched_class = { >    Â.enqueue_task      = enqueue_task_fair, >    Â.dequeue_task      = dequeue_task_fair, >    Â.yield_task       = yield_task_fair, > +    .yield_to_task     Â= yield_to_task_fair, > >    Â.check_preempt_curr   = check_preempt_wakeup, > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at Âhttp://vger.kernel.org/majordomo-info.html > Please read the FAQ at Âhttp://www.tux.org/lkml/ > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html