On Tue, Jul 12, 2011 at 10:31:00AM +0100, Peter Zijlstra wrote: > On Tue, 2011-07-12 at 12:27 +0300, Avi Kivity wrote: > > On 07/12/2011 12:18 PM, Peter Zijlstra wrote: > > > > > > > > The guarantee is that the task was sleeping just before the function is > > > > called. Of course it's woken up to run the function. > > > > > > > > The idea is that you run the function in a known safe point to avoid > > > > extra synchronization. > > > > > > > > > > I'd much rather we didn't wake the task and let it sleep, that's usually > > > a very safe place for tasks to be. All you'd need is a guarantee it > > > won't be woken up while you're doing your thing. > > > > But it means that 'current' is not set to the right value. If the > > function depends on it, then it will misbehave. And in fact > > preempt_notifier_register(), which is the function we want to call here, > > does depend on current. > > > > Of course we need to find more users for this, but I have a feeling this > > will be generally useful. The alternative is to keep adding bits to > > thread_info::flags. > > Using TIF_bits sounds like a much better solution for this, wakeups are > really rather expensive and its best to avoid extra if at all possible. The problem with using a TIF bit to tell a task that it needs to perform some preempt_notifier registrations is that you end up with something that looks a lot like preempt notifiers! You also don't escape the concurrent read/write to thelist of pending registrations. One thing I tried was simply using an RCU protected hlist for the preempt notifiers so that we don't have to worry about atomicity when reading the notifiers in finish_task_switch. It's a bit odd, since we know we only ever have a single reader, but I've included it below anyway. If anybody has any better ideas, I'm all ears. Will diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 2e681d9..2e21ffe 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -132,6 +132,11 @@ struct preempt_notifier { void preempt_notifier_register(struct preempt_notifier *notifier); void preempt_notifier_unregister(struct preempt_notifier *notifier); +void preempt_notifier_register_task(struct preempt_notifier *notifier, + struct task_struct *tsk); +void preempt_notifier_unregister_task(struct preempt_notifier *notifier, + struct task_struct *tsk); + static inline void preempt_notifier_init(struct preempt_notifier *notifier, struct preempt_ops *ops) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 496770a..5530d91 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1233,6 +1233,7 @@ struct task_struct { #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ struct hlist_head preempt_notifiers; + struct mutex preempt_notifiers_mutex; #endif /* diff --git a/kernel/sched.c b/kernel/sched.c index 9769c75..d3c46ca 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2784,6 +2784,7 @@ static void __sched_fork(struct task_struct *p) #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&p->preempt_notifiers); + mutex_init(&p->preempt_notifiers_mutex); #endif } @@ -2901,13 +2902,31 @@ void wake_up_new_task(struct task_struct *p) #ifdef CONFIG_PREEMPT_NOTIFIERS +void preempt_notifier_register_task(struct preempt_notifier *notifier, + struct task_struct *tsk) +{ + mutex_lock(&tsk->preempt_notifiers_mutex); + hlist_add_head_rcu(¬ifier->link, &tsk->preempt_notifiers); + mutex_unlock(&tsk->preempt_notifiers_mutex); +} +EXPORT_SYMBOL_GPL(preempt_notifier_register_task); + +void preempt_notifier_unregister_task(struct preempt_notifier *notifier, + struct task_struct *tsk) +{ + mutex_lock(&tsk->preempt_notifiers_mutex); + hlist_del_rcu(¬ifier->link); + mutex_unlock(&tsk->preempt_notifiers_mutex); +} +EXPORT_SYMBOL_GPL(preempt_notifier_unregister_task); + /** * preempt_notifier_register - tell me when current is being preempted & rescheduled * @notifier: notifier struct to register */ void preempt_notifier_register(struct preempt_notifier *notifier) { - hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); + preempt_notifier_register_task(notifier, current); } EXPORT_SYMBOL_GPL(preempt_notifier_register); @@ -2919,7 +2938,7 @@ EXPORT_SYMBOL_GPL(preempt_notifier_register); */ void preempt_notifier_unregister(struct preempt_notifier *notifier) { - hlist_del(¬ifier->link); + preempt_notifier_unregister_task(notifier, current); } EXPORT_SYMBOL_GPL(preempt_notifier_unregister); @@ -2928,8 +2947,12 @@ static void fire_sched_in_preempt_notifiers(struct task_struct *curr) struct preempt_notifier *notifier; struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + rcu_read_lock(); + + hlist_for_each_entry_rcu(notifier, node, &curr->preempt_notifiers, link) notifier->ops->sched_in(notifier, raw_smp_processor_id()); + + rcu_read_unlock(); } static void @@ -2939,8 +2962,12 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, struct preempt_notifier *notifier; struct hlist_node *node; - hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + rcu_read_lock(); + + hlist_for_each_entry_rcu(notifier, node, &curr->preempt_notifiers, link) notifier->ops->sched_out(notifier, next); + + rcu_read_unlock(); } #else /* !CONFIG_PREEMPT_NOTIFIERS */ @@ -7979,6 +8006,7 @@ void __init sched_init(void) #ifdef CONFIG_PREEMPT_NOTIFIERS INIT_HLIST_HEAD(&init_task.preempt_notifiers); + mutex_init(&init_task.preempt_notifiers_mutex); #endif #ifdef CONFIG_SMP -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html