[patch 7/7] rtmutex: prevent missed wakeups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The sleeping locks implementation based on rtmutexes can miss wakeups
for two reasons:

1) The unconditional usage TASK_UNINTERRUPTIBLE for the blocking state
   
   Results in missed wakeups from wake_up_interruptible*()

   state = TASK_INTERRUPTIBLE;
   blocks_on_lock()
     state = TASK_UNINTERRUPTIBLE;
     schedule();
     ....
     acquires_lock();
     restore_state();

   Until the waiter has restored its state wake_up_interruptible*() will
   fail.

2) The rtmutex wakeup intermediate state TASK_RUNNING_MUTEX

   Results in missed wakeups from wake_up*()

   waiter is woken by mutex wakeup
   	  waiter->state = TASK_RUNNING_MUTEX;
   ....
   acquires_lock();
   restore_state();

   Until the waiter has restored its state wake_up*() will fail.

Solution:

Instead of setting the state to TASK_RUNNING_MUTEX in the mutex wakeup
case we logically OR TASK_RUNNING_MUTEX to the current waiter
state. This keeps the original bits (TASK_INTERRUPTIBLE /
TASK_UNINTERRUPTIBLE) intact and lets wakeups succeed. When a task
blocks on a lock in state TASK_INTERRUPTIBLE and is woken up by a real
wakeup, then we store the state = TASK_RUNNING for the restore and can
safely use TASK_UNINTERRUPTIBLE from that point to avoid further
wakeups which just let us loop in the lock code.

This also removes the extra TASK_RUNNING_MUTEX flags from the
wakeup_process*() functions as they are not longer necessary.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
 kernel/rtmutex.c |   22 +++++++++++++++++++---
 kernel/sched.c   |   40 +++++++++++++++++++++++-----------------
 2 files changed, 42 insertions(+), 20 deletions(-)

Index: linux-2.6.24/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.orig/kernel/rtmutex.c
+++ linux-2.6.24/kernel/rtmutex.c
@@ -798,16 +798,32 @@ static int adaptive_wait(struct rt_mutex
 /*
  * The state setting needs to preserve the original state and needs to
  * take care of non rtmutex wakeups.
+ *
+ * Called with rtmutex->wait_lock held to serialize against rtmutex
+ * wakeups().
  */
 static inline unsigned long
 rt_set_current_blocked_state(unsigned long saved_state)
 {
-	unsigned long state;
+	unsigned long state, block_state;
+
+	/*
+	 * If state is TASK_INTERRUPTIBLE, then we set the state for
+	 * blocking to TASK_INTERRUPTIBLE as well, otherwise we would
+	 * miss real wakeups via wake_up_interruptible(). If such a
+	 * wakeup happens we see the running state and preserve it in
+	 * saved_state. Now we can ignore further wakeups as we will
+	 * return in state running from our "spin" sleep.
+	 */
+	if (saved_state == TASK_INTERRUPTIBLE)
+		block_state = TASK_INTERRUPTIBLE;
+	else
+		block_state = TASK_UNINTERRUPTIBLE;
 
-	state = xchg(&current->state, TASK_UNINTERRUPTIBLE);
+	state = xchg(&current->state, block_state);
 	/*
 	 * Take care of non rtmutex wakeups. rtmutex wakeups
-	 * set the state to TASK_RUNNING_MUTEX.
+	 * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE.
 	 */
 	if (state == TASK_RUNNING)
 		saved_state = TASK_RUNNING;
Index: linux-2.6.24/kernel/sched.c
===================================================================
--- linux-2.6.24.orig/kernel/sched.c
+++ linux-2.6.24/kernel/sched.c
@@ -1765,10 +1765,20 @@ out_activate:
 
 out_running:
 	trace_kernel_sched_wakeup(rq, p);
+
+	/*
+	 * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task
+	 * state to preserve the original state, so a real wakeup
+	 * still can see the (UN)INTERRUPTIBLE bits in the state check
+	 * above. We dont have to worry about the | TASK_RUNNING_MUTEX
+	 * here. The waiter is serialized by the mutex lock and nobody
+	 * else can fiddle with p->state as we hold rq lock.
+	 */
 	if (mutex)
-		p->state = TASK_RUNNING_MUTEX;
+		p->state |= TASK_RUNNING_MUTEX;
 	else
 		p->state = TASK_RUNNING;
+
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
@@ -1782,38 +1792,34 @@ out:
 int fastcall wake_up_process(struct task_struct *p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 0, 0);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
 
 int fastcall wake_up_process_sync(struct task_struct * p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 1, 0);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 1, 0);
 }
 EXPORT_SYMBOL(wake_up_process_sync);
 
 int fastcall wake_up_process_mutex(struct task_struct * p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 0, 1);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
 }
 EXPORT_SYMBOL(wake_up_process_mutex);
 
 int fastcall wake_up_process_mutex_sync(struct task_struct * p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 1, 1);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 1, 1);
 }
 EXPORT_SYMBOL(wake_up_process_mutex_sync);
 
 int fastcall wake_up_state(struct task_struct *p, unsigned int state)
 {
-	return try_to_wake_up(p, state | TASK_RUNNING_MUTEX, 0, 0);
+	return try_to_wake_up(p, state, 0, 0);
 }
 
 /*
@@ -3961,10 +3967,10 @@ asmlinkage void __sched __schedule(void)
 	clear_tsk_need_resched(prev);
 	clear_tsk_need_resched_delayed(prev);
 
-	if ((prev->state & ~TASK_RUNNING_MUTEX) &&
-			!(preempt_count() & PREEMPT_ACTIVE)) {
+	if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state &&
+	    !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
-				unlikely(signal_pending(prev)))) {
+			     unlikely(signal_pending(prev)))) {
 			prev->state = TASK_RUNNING;
 		} else {
 			touch_softlockup_watchdog();
@@ -4184,8 +4190,7 @@ asmlinkage void __sched preempt_schedule
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode | TASK_RUNNING_MUTEX,
-			      sync, 0);
+	return try_to_wake_up(curr->private, mode, sync, 0);
 }
 EXPORT_SYMBOL(default_wake_function);
 
@@ -5421,8 +5426,9 @@ static void show_task(struct task_struct
 	unsigned state;
 
 	state = p->state ? __ffs(p->state) + 1 : 0;
-	printk("%-13.13s %c [%p]", p->comm,
-		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?', p);
+	printk("%-13.13s %c (%03lx) [%p]", p->comm,
+	       state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?',
+	       (unsigned long) p->state, p);
 #if BITS_PER_LONG == 32
 	if (0 && (state == TASK_RUNNING))
 		printk(KERN_CONT " running  ");

-- 

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux