The patch titled futex-pi: fix possible pi_lock deadlock has been added to the -mm tree. Its filename is pi-futex-patchset-v4-fix.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this From: Ingo Molnar <mingo@xxxxxxx> The lock validator detected a possible deadlock between tasklist lock and task->pi_lock. Prevent the deadlock by disabling interrupts across pi_lock operations. Signed-off-by: Ingo Molnar <mingo@xxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- kernel/futex.c | 28 ++++++++++---------- kernel/rtmutex.c | 63 +++++++++++++++++++++++++-------------------- kernel/sched.c | 45 +++++++++++++++++++++++++------- 3 files changed, 84 insertions(+), 52 deletions(-) diff -puN kernel/futex.c~pi-futex-patchset-v4-fix kernel/futex.c --- 25/kernel/futex.c~pi-futex-patchset-v4-fix Tue May 9 12:49:12 2006 +++ 25-akpm/kernel/futex.c Tue May 9 12:49:13 2006 @@ -358,9 +358,9 @@ static void free_pi_state(struct futex_p WARN_ON(!pi_state->owner); WARN_ON(!rt_mutex_is_locked(&pi_state->pi_mutex)); - spin_lock(&pi_state->owner->pi_lock); + spin_lock_irq(&pi_state->owner->pi_lock); list_del_init(&pi_state->list); - spin_unlock(&pi_state->owner->pi_lock); + spin_unlock_irq(&pi_state->owner->pi_lock); rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); @@ -422,18 +422,18 @@ void exit_pi_state_list(struct task_stru * pi_state_list anymore, but we have to be careful * versus waiters unqueueing themselfs */ - spin_lock(&curr->pi_lock); + spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; - spin_unlock(&curr->pi_lock); + spin_unlock_irq(&curr->pi_lock); hb = hash_futex(&key); spin_lock(&hb->lock); - spin_lock(&curr->pi_lock); + spin_lock_irq(&curr->pi_lock); if (head->next != next) { spin_unlock(&hb->lock); continue; @@ -444,15 +444,15 @@ void exit_pi_state_list(struct task_stru WARN_ON(pi_state->owner != curr); pi_state->owner = NULL; - spin_unlock(&curr->pi_lock); + spin_unlock_irq(&curr->pi_lock); rt_mutex_unlock(&pi_state->pi_mutex); spin_unlock(&hb->lock); - spin_lock(&curr->pi_lock); + spin_lock_irq(&curr->pi_lock); } - spin_unlock(&curr->pi_lock); + spin_unlock_irq(&curr->pi_lock); } static int @@ -500,10 +500,10 @@ lookup_pi_state(u32 uval, struct futex_h /* Store the key for possible exit cleanups: */ pi_state->key = me->key; - spin_lock(&p->pi_lock); + spin_lock_irq(&p->pi_lock); list_add(&pi_state->list, &p->pi_state_list); pi_state->owner = p; - spin_unlock(&p->pi_lock); + spin_unlock_irq(&p->pi_lock); put_task_struct(p); @@ -1216,17 +1216,17 @@ static int futex_lock_pi(u32 __user *uad /* Owner died? */ if (q.pi_state->owner != NULL) { - spin_lock(&q.pi_state->owner->pi_lock); + spin_lock_irq(&q.pi_state->owner->pi_lock); list_del_init(&q.pi_state->list); - spin_unlock(&q.pi_state->owner->pi_lock); + spin_unlock_irq(&q.pi_state->owner->pi_lock); } else newtid |= FUTEX_OWNER_DIED; q.pi_state->owner = current; - spin_lock(¤t->pi_lock); + spin_lock_irq(¤t->pi_lock); list_add(&q.pi_state->list, ¤t->pi_state_list); - spin_unlock(¤t->pi_lock); + spin_unlock_irq(¤t->pi_lock); /* Unqueue and drop the lock */ unqueue_me_pi(&q, hb); diff -puN kernel/rtmutex.c~pi-futex-patchset-v4-fix kernel/rtmutex.c --- 25/kernel/rtmutex.c~pi-futex-patchset-v4-fix Tue May 9 12:49:13 2006 +++ 25-akpm/kernel/rtmutex.c Tue May 9 12:49:13 2006 @@ -134,9 +134,11 @@ static void __rt_mutex_adjust_prio(struc */ static void rt_mutex_adjust_prio(struct task_struct *task) { - spin_lock(&task->pi_lock); + unsigned long flags; + + spin_lock_irqsave(&task->pi_lock, flags); __rt_mutex_adjust_prio(task); - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); } /* @@ -158,6 +160,7 @@ static int rt_mutex_adjust_prio_chain(ta struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; int detect_deadlock, ret = 0, depth = 0; + unsigned long flags; detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, deadlock_detect); @@ -190,7 +193,7 @@ static int rt_mutex_adjust_prio_chain(ta /* * Task can not go away as we did a get_task() before ! */ - spin_lock(&task->pi_lock); + spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; /* @@ -216,7 +219,7 @@ static int rt_mutex_adjust_prio_chain(ta lock = waiter->lock; if (!spin_trylock(&lock->wait_lock)) { - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); cpu_relax(); goto retry; } @@ -237,12 +240,12 @@ static int rt_mutex_adjust_prio_chain(ta plist_add(&waiter->list_entry, &lock->wait_list); /* Release the task */ - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); put_task_struct(task); /* Grab the next task */ task = rt_mutex_owner(lock); - spin_lock(&task->pi_lock); + spin_lock_irqsave(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { /* Boost the owner */ @@ -261,7 +264,7 @@ static int rt_mutex_adjust_prio_chain(ta } get_task_struct(task); - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); top_waiter = rt_mutex_top_waiter(lock); spin_unlock(&lock->wait_lock); @@ -272,7 +275,7 @@ static int rt_mutex_adjust_prio_chain(ta goto again; out_unlock_pi: - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); out_put_task: put_task_struct(task); return ret; @@ -287,6 +290,7 @@ static inline int try_to_steal_lock(stru { struct task_struct *pendowner = rt_mutex_owner(lock); struct rt_mutex_waiter *next; + unsigned long flags; if (!rt_mutex_owner_pending(lock)) return 0; @@ -294,9 +298,9 @@ static inline int try_to_steal_lock(stru if (pendowner == current) return 1; - spin_lock(&pendowner->pi_lock); + spin_lock_irqsave(&pendowner->pi_lock, flags); if (current->prio >= pendowner->prio) { - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 0; } @@ -306,7 +310,7 @@ static inline int try_to_steal_lock(stru * priority. */ if (likely(!rt_mutex_has_waiters(lock))) { - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 1; } @@ -314,7 +318,7 @@ static inline int try_to_steal_lock(stru next = rt_mutex_top_waiter(lock); plist_del(&next->pi_list_entry, &pendowner->pi_waiters); __rt_mutex_adjust_prio(pendowner); - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); /* * We are going to steal the lock and a waiter was @@ -331,10 +335,10 @@ static inline int try_to_steal_lock(stru * might be current: */ if (likely(next->task != current)) { - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); plist_add(&next->pi_list_entry, ¤t->pi_waiters); __rt_mutex_adjust_prio(current); - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); } return 1; } @@ -398,8 +402,9 @@ static int task_blocks_on_rt_mutex(struc struct rt_mutex_waiter *top_waiter = waiter; task_t *owner = rt_mutex_owner(lock); int boost = 0, res; + unsigned long flags; - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); __rt_mutex_adjust_prio(current); waiter->task = current; waiter->lock = lock; @@ -413,10 +418,10 @@ static int task_blocks_on_rt_mutex(struc current->pi_blocked_on = waiter; - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { - spin_lock(&owner->pi_lock); + spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); plist_add(&waiter->pi_list_entry, &owner->pi_waiters); @@ -425,15 +430,15 @@ static int task_blocks_on_rt_mutex(struc boost = 1; get_task_struct(owner); } - spin_unlock(&owner->pi_lock); + spin_unlock_irqrestore(&owner->pi_lock, flags); } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { - spin_lock(&owner->pi_lock); + spin_lock_irqsave(&owner->pi_lock, flags); if (owner->pi_blocked_on) { boost = 1; get_task_struct(owner); } - spin_unlock(&owner->pi_lock); + spin_unlock_irqrestore(&owner->pi_lock, flags); } if (!boost) return 0; @@ -460,8 +465,9 @@ static void wakeup_next_waiter(struct rt { struct rt_mutex_waiter *waiter; struct task_struct *pendowner; + unsigned long flags; - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); waiter = rt_mutex_top_waiter(lock); plist_del(&waiter->list_entry, &lock->wait_list); @@ -478,7 +484,7 @@ static void wakeup_next_waiter(struct rt rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); /* * Clear the pi_blocked_on variable and enqueue a possible @@ -487,7 +493,7 @@ static void wakeup_next_waiter(struct rt * waiter with higher priority than pending-owner->normal_prio * is blocked on the unboosted (pending) owner. */ - spin_lock(&pendowner->pi_lock); + spin_lock_irqsave(&pendowner->pi_lock, flags); WARN_ON(!pendowner->pi_blocked_on); WARN_ON(pendowner->pi_blocked_on != waiter); @@ -501,7 +507,7 @@ static void wakeup_next_waiter(struct rt next = rt_mutex_top_waiter(lock); plist_add(&next->pi_list_entry, &pendowner->pi_waiters); } - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); wake_up_process(pendowner); } @@ -517,16 +523,17 @@ static void remove_waiter(struct rt_mute int first = (waiter == rt_mutex_top_waiter(lock)); int boost = 0; task_t *owner = rt_mutex_owner(lock); + unsigned long flags; - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); waiter->task = NULL; current->pi_blocked_on = NULL; - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); if (first && owner != current) { - spin_lock(&owner->pi_lock); + spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&waiter->pi_list_entry, &owner->pi_waiters); @@ -542,7 +549,7 @@ static void remove_waiter(struct rt_mute boost = 1; get_task_struct(owner); } - spin_unlock(&owner->pi_lock); + spin_unlock_irqrestore(&owner->pi_lock, flags); } WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); diff -puN kernel/sched.c~pi-futex-patchset-v4-fix kernel/sched.c --- 25/kernel/sched.c~pi-futex-patchset-v4-fix Tue May 9 12:49:13 2006 +++ 25-akpm/kernel/sched.c Tue May 9 12:49:13 2006 @@ -360,6 +360,25 @@ static inline void finish_lock_switch(ru #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ /* + * __task_rq_lock - lock the runqueue a given task resides on. + * Must be called interrupts disabled. + */ +static inline runqueue_t *__task_rq_lock(task_t *p) + __acquires(rq->lock) +{ + struct runqueue *rq; + +repeat_lock_task: + rq = task_rq(p); + spin_lock(&rq->lock); + if (unlikely(rq != task_rq(p))) { + spin_unlock(&rq->lock); + goto repeat_lock_task; + } + return rq; +} + +/* * task_rq_lock - lock the runqueue a given task resides on and disable * interrupts. Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. @@ -380,6 +399,12 @@ repeat_lock_task: return rq; } +static inline void __task_rq_unlock(runqueue_t *rq) + __releases(rq->lock) +{ + spin_unlock(&rq->lock); +} + static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) __releases(rq->lock) { @@ -4040,17 +4065,17 @@ recheck: * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: */ - spin_lock(&p->pi_lock); + spin_lock_irqsave(&p->pi_lock, flags); /* * To be able to change p->policy safely, the apropriate * runqueue lock must be held. */ - rq = task_rq_lock(p, &flags); + rq = __task_rq_lock(p); /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - task_rq_unlock(rq, &flags); - spin_unlock(&p->pi_lock); + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } array = p->array; @@ -4071,8 +4096,8 @@ recheck: } else if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } - task_rq_unlock(rq, &flags); - spin_unlock(&p->pi_lock); + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); return 0; } @@ -6705,8 +6730,8 @@ void normalize_rt_tasks(void) if (!rt_task(p)) continue; - spin_lock(&p->pi_lock); - rq = task_rq_lock(p, &flags); + spin_lock_irqsave(&p->pi_lock, flags); + rq = __task_rq_lock(p); array = p->array; if (array) @@ -6717,8 +6742,8 @@ void normalize_rt_tasks(void) resched_task(rq->curr); } - task_rq_unlock(rq, &flags); - spin_unlock(&p->pi_lock); + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); } read_unlock_irq(&tasklist_lock); } _ Patches currently in -mm which might be from mingo@xxxxxxx are origin.patch sem2mutex-drivers-acpi.patch sem2mutex-acpi-acpi_link_lock.patch git-dvb.patch sem2mutex-drivers-ieee1394.patch git-netdev-all.patch git-serial.patch fix-for-serial-uart-lockup.patch qla2xxx-lock-ordering-fix.patch x86_64-mm-serialize-assign_irq_vector-use-of-static-variables-fix.patch swapless-pm-add-r-w-migration-entries-fix.patch i386-break-out-of-recursion-in-stackframe-walk.patch work-around-ppc64-bootup-bug-by-making-mutex-debugging-save-restore-irqs.patch kernel-kernel-cpuc-to-mutexes.patch cond-resched-might-sleep-fix.patch cond_resched-added-to-close_files.patch time-clocksource-infrastructure.patch sched-implement-smpnice.patch sched-protect-calculation-of-max_pull-from-integer-wrap.patch sched-store-weighted-load-on-up.patch sched-add-discrete-weighted-cpu-load-function.patch sched-prevent-high-load-weight-tasks-suppressing-balancing.patch sched-improve-stability-of-smpnice-load-balancing.patch sched-improve-smpnice-load-balancing-when-load-per-task.patch smpnice-dont-consider-sched-groups-which-are-lightly-loaded-for-balancing.patch smpnice-dont-consider-sched-groups-which-are-lightly-loaded-for-balancing-fix.patch sched-modify-move_tasks-to-improve-load-balancing-outcomes.patch sched-avoid-unnecessarily-moving-highest-priority-task-move_tasks.patch sched-avoid-unnecessarily-moving-highest-priority-task-move_tasks-fix-2.patch sched_domain-handle-kmalloc-failure.patch sched_domain-handle-kmalloc-failure-fix.patch sched_domain-dont-use-gfp_atomic.patch sched_domain-use-kmalloc_node.patch sched_domain-allocate-sched_group-structures-dynamically.patch sched-add-above-background-load-function.patch mm-implement-swap-prefetching-fix.patch pi-futex-futex-code-cleanups.patch pi-futex-futex-code-cleanups-fix.patch pi-futex-introduce-debug_check_no_locks_freed.patch pi-futex-add-plist-implementation.patch pi-futex-scheduler-support-for-pi.patch pi-futex-rt-mutex-core.patch pi-futex-rt-mutex-core-fix-timeout-race.patch pi-futex-rt-mutex-docs.patch pi-futex-rt-mutex-debug.patch pi-futex-rt-mutex-tester.patch pi-futex-rt-mutex-futex-api.patch pi-futex-futex_lock_pi-futex_unlock_pi-support.patch pi-futex-v2.patch pi-futex-v3.patch pi-futex-patchset-v4.patch pi-futex-patchset-v4-update.patch pi-futex-patchset-v4-fix.patch rtmutex-remove-buggy-bug_on-in-pi-boosting-code.patch futex-pi-enforce-waiter-bit-when-owner-died-is-detected.patch rtmutex-debug-printk-correct-task-information.patch futex-pi-make-use-of-restart_block-when-interrupted.patch reiser4.patch detect-atomic-counter-underflows.patch debug-shared-irqs.patch make-frame_pointer-default=y.patch mutex-subsystem-synchro-test-module.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html