* Peter Zijlstra | 2015-02-18 15:03:20 [+0100]: >On Tue, Feb 17, 2015 at 06:44:19PM +0100, Sebastian Andrzej Siewior wrote: >> * Peter Zijlstra | 2015-01-21 16:07:16 [+0100]: >> >> >On Tue, Jan 20, 2015 at 01:16:13PM -0500, Steven Rostedt wrote: >> >> I'm actually wondering if we should just nuke the _interruptible() >> >> version of swait. As it should only be all interruptible or all not >> >> interruptible, that the swait_wake() should just do the wake up >> >> regardless. In which case, swait_wake() is good enough. No need to have >> >> different versions where people may think do something special. >> >> >> >> Peter? >> > >> >Yeah, I think the lastest thing I have sitting here on my disk only has >> >the swake_up() which does TASK_NORMAL, no choice there. >> >> what is the swait status in terms of mainline? This sounds like it >> beeing worked on. >> I could take the series but then I would drop it again if the mainline >> implementation changes… > >Well, 'worked' on might be putting too much on it, its one of the many >many 'spare' time things that never get attention unless people bug me >;-) > >The below is my current patch, I've not actually tried it yet, I (think >I) had one patch doing some conversions but I'm having trouble locating >it. > >Mostly-Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> >--- > include/linux/swait.h | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++ > kernel/sched/swait.c | 122 +++++++++++++++++++++++++++++++++++ > 2 files changed, 294 insertions(+) > >--- /dev/null >+++ b/include/linux/swait.h >@@ -0,0 +1,172 @@ >+#ifndef _LINUX_SWAIT_H >+#define _LINUX_SWAIT_H >+ >+#include <linux/list.h> >+#include <linux/stddef.h> >+#include <linux/spinlock.h> >+#include <asm/current.h> >+ >+/* >+ * Simple wait queues >+ * >+ * While these are very similar to the other/complex wait queues (wait.h) the >+ * most important difference is that the simple waitqueue allows for >+ * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold >+ * times. >+ * >+ * In order to make this so, we had to drop a fair number of features of the >+ * other waitqueue code; notably: >+ * >+ * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; >+ * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right >+ * sleeper state. >+ * >+ * - the exclusive mode; because this requires preserving the list order >+ * and this is hard. >+ * >+ * - custom wake functions; because you cannot give any guarantees about >+ * random code. >+ * >+ * As a side effect of this; the data structures are slimmer. >+ * >+ * One would recommend using this wait queue where possible. >+ */ >+ >+struct task_struct; >+ >+struct swait_queue_head { >+ raw_spinlock_t lock; >+ struct list_head task_list; >+}; >+ >+struct swait_queue { >+ struct task_struct *task; >+ struct list_head task_list; I would prefer something different than task_list here since this is an item. Scrolling down you tried to use node once so maybe that would be good here :) >+}; >+ >+#define __SWAITQUEUE_INITIALIZER(name) { \ >+ .task = current, \ >+ .task_list = LIST_HEAD_INIT((name).task_list), \ >+} >+ >+#define DECLARE_SWAITQUEUE(name) \ >+ struct swait_queue name = __SWAITQUEUE_INITIALIZER(name) >+ >+#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \ >+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ >+ .task_list = LIST_HEAD_INIT((name).task_list), \ >+} >+ >+#define DECLARE_SWAIT_QUEUE_HEAD(name) \ >+ struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name) >+ >+extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name, >+ struct lock_class_key *key); >+ >+#define init_swait_queue_head(q) \ >+ do { \ >+ static struct lock_class_key __key; \ >+ __init_swait_queue_head((q), #q, &__key); \ >+ } while (0) >+ >+#ifdef CONFIG_LOCKDEP >+# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ >+ ({ init_swait_queue_head(&name); name; }) >+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ >+ struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) >+#else >+# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ >+ DECLARE_SWAIT_QUEUE_HEAD(name) >+#endif >+ >+static inline int swait_active(struct swait_queue_head *q) >+{ >+ return !list_empty(&q->task_list); In RT there was a smp_mb() which you dropped and I assume you had reasons for it. I assumed that one can perform list_empty_careful() without a lock if the items were removed with list_del_init(). But since nothing in -RT blow up so far I guess this here is legal, too :) >+} >+ >+extern void swake_up(struct swait_queue_head *q); >+extern void swake_up_all(struct swait_queue_head *q); >+extern void swake_up_locked(struct swait_queue_head *q); >+ >+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); >+extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); >+extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); >+ >+extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); >+extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); >+ >+/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ >+#define ___swait_event(wq, condition, state, ret, cmd) \ >+({ \ >+ struct swait_queue __wait; \ >+ long __ret = ret; \ >+ \ >+ INIT_LIST_HEAD(&__wait.task_list); \ >+ for (;;) { \ >+ long __int = prepare_to_swait_event(&wq, &__wait, state);\ >+ \ >+ if (condition) \ >+ break; \ >+ \ >+ if (___wait_is_interruptible(state) && __int) { \ >+ __ret = __int; \ >+ break; \ >+ } \ >+ \ >+ cmd; \ >+ } \ >+ finish_swait(&wq, &__wait); \ >+ __ret; \ >+}) >+ >+#define __swait_event(wq, condition) \ >+ (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ >+ schedule()) >+ >+#define swait_event(wq, condition) \ >+do { \ >+ if (condition) \ >+ break; \ >+ __swait_event(wq, condition); \ >+} while (0) >+ >+#define __swait_event_timeout(wq, condition, timeout) \ >+ ___swait_event(wq, ___wait_cond_timeout(condition), \ >+ TASK_UNINTERRUPTIBLE, timeout, \ >+ __ret = schedule_timeout(__ret)) >+ >+#define swait_event_timeout(wq, condition, timeout) \ >+({ \ >+ long __ret = timeout; \ >+ if (!___wait_cond_timeout(condition)) \ >+ __ret = __swait_event_timeout(wq, condition, timeout); \ >+ __ret; \ >+}) >+ >+#define __swait_event_interruptible(wq, condition) \ >+ ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ >+ schedule()) >+ >+#define swait_event_interruptible(wq, condition) \ >+({ \ >+ int __ret = 0; \ >+ if (!(condition)) \ >+ __ret = __swait_event_interruptible(wq, condition); \ >+ __ret; \ >+}) >+ >+#define __swait_event_interruptible_timeout(wq, condition, timeout) \ >+ ___swait_event(wq, ___wait_cond_timeout(condition), \ >+ TASK_INTERRUPTIBLE, timeout, \ >+ __ret = schedule_timeout(__ret)) >+ >+#define swait_event_interruptible_timeout(wq, condition, timeout) \ >+({ \ >+ long __ret = timeout; \ >+ if (!___wait_cond_timeout(condition)) \ >+ __ret = __swait_event_interruptible_timeout(wq, \ >+ condition, timeout); \ >+ __ret; \ >+}) >+ >+#endif /* _LINUX_SWAIT_H */ >--- /dev/null >+++ b/kernel/sched/swait.c >@@ -0,0 +1,122 @@ >+ >+#include <linux/swait.h> >+ >+void __init_swait_queue_head(struct swait_queue_head *q, const char *name, >+ struct lock_class_key *key) >+{ >+ raw_spin_lock_init(&q->lock); >+ lockdep_set_class_and_name(&q->lock, key, name); >+ INIT_LIST_HEAD(&q->task_list); >+} >+EXPORT_SYMBOL(__init_swait_queue_head); >+ >+/* >+ * The thing about the wake_up_state() return value; I think we can ignore it. >+ * >+ * If for some reason it would return 0, that means the previously waiting >+ * task is already running, so it will observe condition true (or has already). >+ */ >+void swake_up_locked(struct swait_queue_head *q) >+{ >+ struct swait_queue *curr; >+ >+ list_for_each_entry(curr, &q->task_list, task_list) { >+ wake_up_process(curr->task); okay. So since we limit everything to TASK_NORMAL which has to sleep while on the list there is no need to check if we actually woken up someone. >+ list_del_init(&curr->task_list); >+ break; >+ } >+} >+EXPORT_SYMBOL(swake_up_locked); >+ >+void swake_up(struct swait_queue_head *q) >+{ >+ unsigned long flags; >+ >+ if (!swait_active(q)) >+ return; >+ >+ raw_spin_lock_irqsave(&q->lock, flags); >+ __swake_up_locked(q); I thing this should have been swake_up_locked() instead since __swake_up_locked() isn't part of this patch. Just a nitpick: later there is __prepare_to_swait() and __finish_swait() which have the __ prefix instead a _locked suffix. Not sure what is better for a better for a public API but maybe one way would be good. >+ raw_spin_unlock_irqrestore(&q->lock, flags); >+} >+EXPORT_SYMBOL(swake_up); >+ >+/* >+ * Does not allow usage from IRQ disabled, since we must be able to >+ * release IRQs to guarantee bounded hold time. >+ */ >+void swake_up_all(struct swait_queue_head *q) >+{ >+ struct swait_queue *curr, *next; >+ LIST_HEAD(tmp); WARN_ON(irqs_disabled()) ? >+ if (!swait_active(q)) >+ return; >+ >+ raw_spin_lock_irq(&q->lock); >+ list_splice_init(&q->task_list, &tmp); >+ while (!list_empty(&tmp)) { >+ curr = list_first_entry(&tmp, typeof(curr), task_list); >+ >+ wake_up_state(curr->task, state); >+ list_del_init(&curr->task_list); So because the task may timeout and remove itself from the list at anytime you need to hold the lock during wakeup and the removal from the list >+ >+ if (list_empty(&tmp)) >+ break; >+ >+ raw_spin_unlock_irq(&q->lock); and you drop the lock after each iteration in case there is an IRQ pending or the task, that has been just woken up, has a higher priority than the current task and needs to get on the CPU. Not sure if this case matters: - _this_ task (wake_all) prio 120 - first task in queue prio 10, RR - second task in queue prio 9, RR the *old* behavior would put the second task before the first task on CPU. The *new* behaviour puts the first task on the CPU after dropping the lock. The second task (that has a higher priority but nobody knows) has to wait until the first one is done (and anything else that might been woken up in the meantime with a higher prio than 120). >+ raw_spin_lock_irq(&q->lock); >+ } >+ raw_spin_unlock_irq(&q->lock); >+} >+EXPORT_SYMBOL(swake_up_all); >+ >+void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) >+{ >+ wait->task = current; >+ if (list_empty(&wait->node)) >+ list_add(&wait->task_list, &q->task_list); >+} >+ >+void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state) >+{ >+ unsigned long flags; >+ >+ raw_spin_lock_irqsave(&q->lock, flags); >+ __prepare_to_swait(q, wait); >+ set_current_state(state); >+ raw_spin_unlock_irqrestore(&q->lock, flags); >+} >+EXPORT_SYMBOL(prepare_to_swait); >+ >+long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state) >+{ >+ if (signal_pending_state(state, current)) >+ return -ERESTARTSYS; >+ >+ prepare_to_swait(q, wait, state); >+ >+ return 0; >+} >+EXPORT_SYMBOL(prepare_to_swait_event); >+ >+void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait) this one has no users the __ suggests that it is locked edition. Maybe it is for the completions… >+{ >+ __set_current_state(TASK_RUNNING); >+ if (!list_empty(&wait->task_list)) >+ list_del_init(&wait->task_list); >+} >+ >+void finish_swait(struct swait_queue_head *q, struct swait_queue *wait) >+{ >+ unsigned long flags; >+ >+ __set_current_state(TASK_RUNNING); >+ >+ if (!list_empty_careful(&wait->task_list)) { >+ raw_spin_lock_irqsave(&q->lock, flags); >+ list_del_init(&wait->task_list); >+ raw_spin_unlock_irqrestore(&q->lock, flags); >+ } >+} >+EXPORT_SYMBOL(finish_swait); Sebastian -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html