[patch 7/7] rtmutex: prevent missed wakeups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The sleeping locks implementation based on rtmutexes can miss wakeups
for two reasons:

1) The unconditional usage TASK_UNINTERRUPTIBLE for the blocking state
   
   Results in missed wakeups from wake_up_interruptible*()

   state = TASK_INTERRUPTIBLE;
   blocks_on_lock()
     state = TASK_UNINTERRUPTIBLE;
     schedule();
     ....
     acquires_lock();
     restore_state();

   Until the waiter has restored its state wake_up_interruptible*() will
   fail.

2) The rtmutex wakeup intermediate state TASK_RUNNING_MUTEX

   Results in missed wakeups from wake_up*()

   waiter is woken by mutex wakeup
   	  waiter->state = TASK_RUNNING_MUTEX;
   ....
   acquires_lock();
   restore_state();

   Until the waiter has restored its state wake_up*() will fail.

Solution:

Instead of setting the state to TASK_RUNNING_MUTEX in the mutex wakeup
case we logically OR TASK_RUNNING_MUTEX to the current waiter
state. This keeps the original bits (TASK_INTERRUPTIBLE /
TASK_UNINTERRUPTIBLE) intact and lets wakeups succeed. When a task
blocks on a lock in state TASK_INTERRUPTIBLE and is woken up by a real
wakeup, then we store the state = TASK_RUNNING for the restore and can
safely use TASK_UNINTERRUPTIBLE from that point to avoid further
wakeups which just let us loop in the lock code.

This also removes the extra TASK_RUNNING_MUTEX flags from the
wakeup_process*() functions as they are not longer necessary.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
---
 kernel/rtmutex.c |   27 +++++++++++++++++++++------
 kernel/sched.c   |   40 +++++++++++++++++++++++-----------------
 2 files changed, 44 insertions(+), 23 deletions(-)

Index: linux-2.6.24/kernel/rtmutex.c
===================================================================
--- linux-2.6.24.orig/kernel/rtmutex.c
+++ linux-2.6.24/kernel/rtmutex.c
@@ -799,9 +799,11 @@ static int adaptive_wait(struct rt_mutex
  * The state setting needs to preserve the original state and needs to
  * take care of non rtmutex wakeups.
  *
- * The special case here is TASK_INTERRUPTIBLE: We cannot set the
- * blocked state to TASK_UNINTERRUPTIBLE as we would miss wakeups from
- * wake_up_interruptible().
+ * Called with rtmutex->wait_lock held to serialize against rtmutex
+ * wakeups().
+ *
+ * The cmpxchg() loop makes sure that we do not miss an update from a
+ * real wakeup.
  */
 static inline unsigned long
 rt_set_current_blocked_state(unsigned long saved_state)
@@ -810,14 +812,27 @@ rt_set_current_blocked_state(unsigned lo
 
 	do {
 		state = current->state;
+
 		/*
-		 * Take care of non rtmutex wakeups. rtmutex wakeups
-		 * set the state to TASK_RUNNING_MUTEX.
+		 * Take care of non rtmutex wakeups:
 		 */
 		if (state == TASK_RUNNING)
 			saved_state = TASK_RUNNING;
 
-		block_state = TASK_UNINTERRUPTIBLE;
+		/*
+		 * If state is TASK_INTERRUPTIBLE, then we set the
+		 * state for blocking to TASK_INTERRUPTIBLE as well,
+		 * otherwise we would miss real wakeups via
+		 * wake_up_interruptible(). If such a wakeup happens
+		 * we see the running state and preserve it in
+		 * saved_state. Now we can ignore further wakeups as
+		 * we will return in state running from our "spin"
+		 * sleep.
+		 */
+		if (saved_state == TASK_INTERRUPTIBLE)
+			block_state = TASK_INTERRUPTIBLE;
+		else
+			block_state = TASK_UNINTERRUPTIBLE;
 
 	} while (cmpxchg(&current->state, state, block_state) != state);
 
Index: linux-2.6.24/kernel/sched.c
===================================================================
--- linux-2.6.24.orig/kernel/sched.c
+++ linux-2.6.24/kernel/sched.c
@@ -1765,10 +1765,20 @@ out_activate:
 
 out_running:
 	trace_kernel_sched_wakeup(rq, p);
+
+	/*
+	 * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task
+	 * state to preserve the original state, so a real wakeup
+	 * still can see the (UN)INTERRUPTIBLE bits in the state check
+	 * above. We dont have to worry about the | TASK_RUNNING_MUTEX
+	 * here. The waiter is serialized by the mutex lock and nobody
+	 * else can fiddle with p->state as we hold rq lock.
+	 */
 	if (mutex)
-		p->state = TASK_RUNNING_MUTEX;
+		p->state |= TASK_RUNNING_MUTEX;
 	else
 		p->state = TASK_RUNNING;
+
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
@@ -1782,38 +1792,34 @@ out:
 int fastcall wake_up_process(struct task_struct *p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 0, 0);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
 
 int fastcall wake_up_process_sync(struct task_struct * p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 1, 0);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 1, 0);
 }
 EXPORT_SYMBOL(wake_up_process_sync);
 
 int fastcall wake_up_process_mutex(struct task_struct * p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 0, 1);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0, 1);
 }
 EXPORT_SYMBOL(wake_up_process_mutex);
 
 int fastcall wake_up_process_mutex_sync(struct task_struct * p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-			      TASK_RUNNING_MUTEX | TASK_INTERRUPTIBLE |
-			      TASK_UNINTERRUPTIBLE, 1, 1);
+			      TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 1, 1);
 }
 EXPORT_SYMBOL(wake_up_process_mutex_sync);
 
 int fastcall wake_up_state(struct task_struct *p, unsigned int state)
 {
-	return try_to_wake_up(p, state | TASK_RUNNING_MUTEX, 0, 0);
+	return try_to_wake_up(p, state, 0, 0);
 }
 
 /*
@@ -3961,10 +3967,10 @@ asmlinkage void __sched __schedule(void)
 	clear_tsk_need_resched(prev);
 	clear_tsk_need_resched_delayed(prev);
 
-	if ((prev->state & ~TASK_RUNNING_MUTEX) &&
-			!(preempt_count() & PREEMPT_ACTIVE)) {
+	if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state &&
+	    !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
-				unlikely(signal_pending(prev)))) {
+			     unlikely(signal_pending(prev)))) {
 			prev->state = TASK_RUNNING;
 		} else {
 			touch_softlockup_watchdog();
@@ -4184,8 +4190,7 @@ asmlinkage void __sched preempt_schedule
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode | TASK_RUNNING_MUTEX,
-			      sync, 0);
+	return try_to_wake_up(curr->private, mode, sync, 0);
 }
 EXPORT_SYMBOL(default_wake_function);
 
@@ -5421,8 +5426,9 @@ static void show_task(struct task_struct
 	unsigned state;
 
 	state = p->state ? __ffs(p->state) + 1 : 0;
-	printk("%-13.13s %c [%p]", p->comm,
-		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?', p);
+	printk("%-13.13s %c (%03lx) [%p]", p->comm,
+	       state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?',
+	       (unsigned long) p->state, p);
 #if BITS_PER_LONG == 32
 	if (0 && (state == TASK_RUNNING))
 		printk(KERN_CONT " running  ");

-- 

--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux