The patch titled futex-pi: fix possible pi_lock deadlock has been removed from the -mm tree. Its filename is pi-futex-patchset-v4-fix.patch This patch was probably dropped from -mm because it has now been merged into a subsystem tree or into Linus's tree, or because it was folded into its parent patch in the -mm tree. ------------------------------------------------------ Subject: futex-pi: fix possible pi_lock deadlock From: Ingo Molnar <mingo@xxxxxxx> The lock validator detected a possible deadlock between tasklist lock and task->pi_lock. Prevent the deadlock by disabling interrupts across pi_lock operations. Signed-off-by: Ingo Molnar <mingo@xxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- kernel/futex.c | 28 ++++++++++---------- kernel/rtmutex.c | 63 +++++++++++++++++++++++++-------------------- kernel/sched.c | 45 +++++++++++++++++++++++++------- 3 files changed, 84 insertions(+), 52 deletions(-) diff -puN kernel/futex.c~pi-futex-patchset-v4-fix kernel/futex.c --- devel/kernel/futex.c~pi-futex-patchset-v4-fix 2006-05-19 16:01:34.000000000 -0700 +++ devel-akpm/kernel/futex.c 2006-05-19 16:01:34.000000000 -0700 @@ -358,9 +358,9 @@ static void free_pi_state(struct futex_p WARN_ON(!pi_state->owner); WARN_ON(!rt_mutex_is_locked(&pi_state->pi_mutex)); - spin_lock(&pi_state->owner->pi_lock); + spin_lock_irq(&pi_state->owner->pi_lock); list_del_init(&pi_state->list); - spin_unlock(&pi_state->owner->pi_lock); + spin_unlock_irq(&pi_state->owner->pi_lock); rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); @@ -422,18 +422,18 @@ void exit_pi_state_list(struct task_stru * pi_state_list anymore, but we have to be careful * versus waiters unqueueing themselfs */ - spin_lock(&curr->pi_lock); + spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; - spin_unlock(&curr->pi_lock); + spin_unlock_irq(&curr->pi_lock); hb = hash_futex(&key); spin_lock(&hb->lock); - spin_lock(&curr->pi_lock); + spin_lock_irq(&curr->pi_lock); if (head->next != next) { spin_unlock(&hb->lock); continue; @@ -444,15 +444,15 @@ void exit_pi_state_list(struct task_stru WARN_ON(pi_state->owner != curr); pi_state->owner = NULL; - spin_unlock(&curr->pi_lock); + spin_unlock_irq(&curr->pi_lock); rt_mutex_unlock(&pi_state->pi_mutex); spin_unlock(&hb->lock); - spin_lock(&curr->pi_lock); + spin_lock_irq(&curr->pi_lock); } - spin_unlock(&curr->pi_lock); + spin_unlock_irq(&curr->pi_lock); } static int @@ -500,10 +500,10 @@ lookup_pi_state(u32 uval, struct futex_h /* Store the key for possible exit cleanups: */ pi_state->key = me->key; - spin_lock(&p->pi_lock); + spin_lock_irq(&p->pi_lock); list_add(&pi_state->list, &p->pi_state_list); pi_state->owner = p; - spin_unlock(&p->pi_lock); + spin_unlock_irq(&p->pi_lock); put_task_struct(p); @@ -1216,17 +1216,17 @@ static int futex_lock_pi(u32 __user *uad /* Owner died? */ if (q.pi_state->owner != NULL) { - spin_lock(&q.pi_state->owner->pi_lock); + spin_lock_irq(&q.pi_state->owner->pi_lock); list_del_init(&q.pi_state->list); - spin_unlock(&q.pi_state->owner->pi_lock); + spin_unlock_irq(&q.pi_state->owner->pi_lock); } else newtid |= FUTEX_OWNER_DIED; q.pi_state->owner = current; - spin_lock(¤t->pi_lock); + spin_lock_irq(¤t->pi_lock); list_add(&q.pi_state->list, ¤t->pi_state_list); - spin_unlock(¤t->pi_lock); + spin_unlock_irq(¤t->pi_lock); /* Unqueue and drop the lock */ unqueue_me_pi(&q, hb); diff -puN kernel/rtmutex.c~pi-futex-patchset-v4-fix kernel/rtmutex.c --- devel/kernel/rtmutex.c~pi-futex-patchset-v4-fix 2006-05-19 16:01:34.000000000 -0700 +++ devel-akpm/kernel/rtmutex.c 2006-05-19 16:01:34.000000000 -0700 @@ -134,9 +134,11 @@ static void __rt_mutex_adjust_prio(struc */ static void rt_mutex_adjust_prio(struct task_struct *task) { - spin_lock(&task->pi_lock); + unsigned long flags; + + spin_lock_irqsave(&task->pi_lock, flags); __rt_mutex_adjust_prio(task); - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); } /* @@ -158,6 +160,7 @@ static int rt_mutex_adjust_prio_chain(ta struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; int detect_deadlock, ret = 0, depth = 0; + unsigned long flags; detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, deadlock_detect); @@ -190,7 +193,7 @@ static int rt_mutex_adjust_prio_chain(ta /* * Task can not go away as we did a get_task() before ! */ - spin_lock(&task->pi_lock); + spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; /* @@ -216,7 +219,7 @@ static int rt_mutex_adjust_prio_chain(ta lock = waiter->lock; if (!spin_trylock(&lock->wait_lock)) { - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); cpu_relax(); goto retry; } @@ -237,12 +240,12 @@ static int rt_mutex_adjust_prio_chain(ta plist_add(&waiter->list_entry, &lock->wait_list); /* Release the task */ - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); put_task_struct(task); /* Grab the next task */ task = rt_mutex_owner(lock); - spin_lock(&task->pi_lock); + spin_lock_irqsave(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { /* Boost the owner */ @@ -261,7 +264,7 @@ static int rt_mutex_adjust_prio_chain(ta } get_task_struct(task); - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); top_waiter = rt_mutex_top_waiter(lock); spin_unlock(&lock->wait_lock); @@ -272,7 +275,7 @@ static int rt_mutex_adjust_prio_chain(ta goto again; out_unlock_pi: - spin_unlock(&task->pi_lock); + spin_unlock_irqrestore(&task->pi_lock, flags); out_put_task: put_task_struct(task); return ret; @@ -287,6 +290,7 @@ static inline int try_to_steal_lock(stru { struct task_struct *pendowner = rt_mutex_owner(lock); struct rt_mutex_waiter *next; + unsigned long flags; if (!rt_mutex_owner_pending(lock)) return 0; @@ -294,9 +298,9 @@ static inline int try_to_steal_lock(stru if (pendowner == current) return 1; - spin_lock(&pendowner->pi_lock); + spin_lock_irqsave(&pendowner->pi_lock, flags); if (current->prio >= pendowner->prio) { - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 0; } @@ -306,7 +310,7 @@ static inline int try_to_steal_lock(stru * priority. */ if (likely(!rt_mutex_has_waiters(lock))) { - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 1; } @@ -314,7 +318,7 @@ static inline int try_to_steal_lock(stru next = rt_mutex_top_waiter(lock); plist_del(&next->pi_list_entry, &pendowner->pi_waiters); __rt_mutex_adjust_prio(pendowner); - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); /* * We are going to steal the lock and a waiter was @@ -331,10 +335,10 @@ static inline int try_to_steal_lock(stru * might be current: */ if (likely(next->task != current)) { - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); plist_add(&next->pi_list_entry, ¤t->pi_waiters); __rt_mutex_adjust_prio(current); - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); } return 1; } @@ -398,8 +402,9 @@ static int task_blocks_on_rt_mutex(struc struct rt_mutex_waiter *top_waiter = waiter; task_t *owner = rt_mutex_owner(lock); int boost = 0, res; + unsigned long flags; - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); __rt_mutex_adjust_prio(current); waiter->task = current; waiter->lock = lock; @@ -413,10 +418,10 @@ static int task_blocks_on_rt_mutex(struc current->pi_blocked_on = waiter; - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { - spin_lock(&owner->pi_lock); + spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); plist_add(&waiter->pi_list_entry, &owner->pi_waiters); @@ -425,15 +430,15 @@ static int task_blocks_on_rt_mutex(struc boost = 1; get_task_struct(owner); } - spin_unlock(&owner->pi_lock); + spin_unlock_irqrestore(&owner->pi_lock, flags); } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { - spin_lock(&owner->pi_lock); + spin_lock_irqsave(&owner->pi_lock, flags); if (owner->pi_blocked_on) { boost = 1; get_task_struct(owner); } - spin_unlock(&owner->pi_lock); + spin_unlock_irqrestore(&owner->pi_lock, flags); } if (!boost) return 0; @@ -460,8 +465,9 @@ static void wakeup_next_waiter(struct rt { struct rt_mutex_waiter *waiter; struct task_struct *pendowner; + unsigned long flags; - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); waiter = rt_mutex_top_waiter(lock); plist_del(&waiter->list_entry, &lock->wait_list); @@ -478,7 +484,7 @@ static void wakeup_next_waiter(struct rt rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING); - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); /* * Clear the pi_blocked_on variable and enqueue a possible @@ -487,7 +493,7 @@ static void wakeup_next_waiter(struct rt * waiter with higher priority than pending-owner->normal_prio * is blocked on the unboosted (pending) owner. */ - spin_lock(&pendowner->pi_lock); + spin_lock_irqsave(&pendowner->pi_lock, flags); WARN_ON(!pendowner->pi_blocked_on); WARN_ON(pendowner->pi_blocked_on != waiter); @@ -501,7 +507,7 @@ static void wakeup_next_waiter(struct rt next = rt_mutex_top_waiter(lock); plist_add(&next->pi_list_entry, &pendowner->pi_waiters); } - spin_unlock(&pendowner->pi_lock); + spin_unlock_irqrestore(&pendowner->pi_lock, flags); wake_up_process(pendowner); } @@ -517,16 +523,17 @@ static void remove_waiter(struct rt_mute int first = (waiter == rt_mutex_top_waiter(lock)); int boost = 0; task_t *owner = rt_mutex_owner(lock); + unsigned long flags; - spin_lock(¤t->pi_lock); + spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); waiter->task = NULL; current->pi_blocked_on = NULL; - spin_unlock(¤t->pi_lock); + spin_unlock_irqrestore(¤t->pi_lock, flags); if (first && owner != current) { - spin_lock(&owner->pi_lock); + spin_lock_irqsave(&owner->pi_lock, flags); plist_del(&waiter->pi_list_entry, &owner->pi_waiters); @@ -542,7 +549,7 @@ static void remove_waiter(struct rt_mute boost = 1; get_task_struct(owner); } - spin_unlock(&owner->pi_lock); + spin_unlock_irqrestore(&owner->pi_lock, flags); } WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); diff -puN kernel/sched.c~pi-futex-patchset-v4-fix kernel/sched.c --- devel/kernel/sched.c~pi-futex-patchset-v4-fix 2006-05-19 16:01:34.000000000 -0700 +++ devel-akpm/kernel/sched.c 2006-05-19 16:01:34.000000000 -0700 @@ -358,6 +358,25 @@ static inline void finish_lock_switch(ru #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ /* + * __task_rq_lock - lock the runqueue a given task resides on. + * Must be called interrupts disabled. + */ +static inline runqueue_t *__task_rq_lock(task_t *p) + __acquires(rq->lock) +{ + struct runqueue *rq; + +repeat_lock_task: + rq = task_rq(p); + spin_lock(&rq->lock); + if (unlikely(rq != task_rq(p))) { + spin_unlock(&rq->lock); + goto repeat_lock_task; + } + return rq; +} + +/* * task_rq_lock - lock the runqueue a given task resides on and disable * interrupts. Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. @@ -378,6 +397,12 @@ repeat_lock_task: return rq; } +static inline void __task_rq_unlock(runqueue_t *rq) + __releases(rq->lock) +{ + spin_unlock(&rq->lock); +} + static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) __releases(rq->lock) { @@ -4043,17 +4068,17 @@ recheck: * make sure no PI-waiters arrive (or leave) while we are * changing the priority of the task: */ - spin_lock(&p->pi_lock); + spin_lock_irqsave(&p->pi_lock, flags); /* * To be able to change p->policy safely, the apropriate * runqueue lock must be held. */ - rq = task_rq_lock(p, &flags); + rq = __task_rq_lock(p); /* recheck policy now with rq lock held */ if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { policy = oldpolicy = -1; - task_rq_unlock(rq, &flags); - spin_unlock(&p->pi_lock); + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } array = p->array; @@ -4074,8 +4099,8 @@ recheck: } else if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } - task_rq_unlock(rq, &flags); - spin_unlock(&p->pi_lock); + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); return 0; } @@ -6708,8 +6733,8 @@ void normalize_rt_tasks(void) if (!rt_task(p)) continue; - spin_lock(&p->pi_lock); - rq = task_rq_lock(p, &flags); + spin_lock_irqsave(&p->pi_lock, flags); + rq = __task_rq_lock(p); array = p->array; if (array) @@ -6720,8 +6745,8 @@ void normalize_rt_tasks(void) resched_task(rq->curr); } - task_rq_unlock(rq, &flags); - spin_unlock(&p->pi_lock); + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); } read_unlock_irq(&tasklist_lock); } _ Patches currently in -mm which might be from mingo@xxxxxxx are git-acpi.patch fix-drivers-mfd-ucb1x00-corec-irq-probing-bug.patch git-infiniband.patch git-netdev-all.patch fix-for-serial-uart-lockup.patch swapless-pm-add-r-w-migration-entries-fix.patch i386-break-out-of-recursion-in-stackframe-walk.patch x86-re-enable-generic-numa.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-tidy.patch vdso-randomize-the-i386-vdso-by-moving-it-into-a-vma-arch_vma_name-fix.patch work-around-ppc64-bootup-bug-by-making-mutex-debugging-save-restore-irqs.patch kernel-kernel-cpuc-to-mutexes.patch cond-resched-might-sleep-fix.patch define-__raw_get_cpu_var-and-use-it.patch ide-cd-end-of-media-error-fix.patch spin-rwlock-init-cleanups.patch time-clocksource-infrastructure.patch sched-comment-bitmap-size-accounting.patch sched-fix-interactive-ceiling-code.patch sched-implement-smpnice.patch sched-protect-calculation-of-max_pull-from-integer-wrap.patch sched-store-weighted-load-on-up.patch sched-add-discrete-weighted-cpu-load-function.patch sched-prevent-high-load-weight-tasks-suppressing-balancing.patch sched-improve-stability-of-smpnice-load-balancing.patch sched-improve-smpnice-load-balancing-when-load-per-task.patch smpnice-dont-consider-sched-groups-which-are-lightly-loaded-for-balancing.patch smpnice-dont-consider-sched-groups-which-are-lightly-loaded-for-balancing-fix.patch sched-modify-move_tasks-to-improve-load-balancing-outcomes.patch sched-avoid-unnecessarily-moving-highest-priority-task-move_tasks.patch sched-avoid-unnecessarily-moving-highest-priority-task-move_tasks-fix-2.patch sched_domain-handle-kmalloc-failure.patch sched_domain-handle-kmalloc-failure-fix.patch sched_domain-dont-use-gfp_atomic.patch sched_domain-use-kmalloc_node.patch sched_domain-allocate-sched_group-structures-dynamically.patch sched-add-above-background-load-function.patch mm-implement-swap-prefetching-fix.patch pi-futex-patchset-v4-fix.patch rtmutex-remove-buggy-bug_on-in-pi-boosting-code.patch futex-pi-enforce-waiter-bit-when-owner-died-is-detected.patch rtmutex-debug-printk-correct-task-information.patch futex-pi-make-use-of-restart_block-when-interrupted.patch document-futex-pi-design.patch futex_requeue-optimization.patch reiser4.patch reiser4-spin-rwlock-init-cleanups.patch genirq-rename-desc-handler-to-desc-chip.patch genirq-rename-desc-handler-to-desc-chip-power-fix.patch genirq-rename-desc-handler-to-desc-chip-ia64-fix.patch genirq-rename-desc-handler-to-desc-chip-ia64-fix-2.patch genirq-sem2mutex-probe_sem-probing_active.patch genirq-cleanup-merge-irq_affinity-into-irq_desc.patch genirq-cleanup-remove-irq_descp.patch genirq-cleanup-remove-fastcall.patch genirq-cleanup-misc-code-cleanups.patch genirq-cleanup-reduce-irq_desc_t-use-mark-it-obsolete.patch genirq-cleanup-include-linux-irqh.patch genirq-cleanup-merge-irq_dir-smp_affinity_entry-into-irq_desc.patch genirq-cleanup-merge-pending_irq_cpumask-into-irq_desc.patch genirq-cleanup-turn-arch_has_irq_per_cpu-into-config_irq_per_cpu.patch genirq-debug-better-debug-printout-in-enable_irq.patch genirq-add-retrigger-irq-op-to-consolidate-hw_irq_resend.patch genirq-doc-comment-include-linux-irqh-structures.patch genirq-doc-handle_irq_event-and-__do_irq-comments.patch genirq-cleanup-no_irq_type-cleanups.patch genirq-doc-add-design-documentation.patch genirq-add-genirq-sw-irq-retrigger.patch genirq-add-irq_noprobe-support.patch genirq-add-irq_norequest-support.patch genirq-add-irq_noautoen-support.patch genirq-update-copyrights.patch genirq-core.patch genirq-add-irq-chip-support.patch genirq-add-handle_bad_irq.patch genirq-add-irq-wake-power-management-support.patch genirq-add-sa_trigger-support.patch genirq-cleanup-no_irq_type-no_irq_chip-rename.patch genirq-convert-the-x86_64-architecture-to-irq-chips.patch genirq-convert-the-i386-architecture-to-irq-chips.patch genirq-convert-the-i386-architecture-to-irq-chips-fix-2.patch genirq-more-verbose-debugging-on-unexpected-irq-vectors.patch detect-atomic-counter-underflows.patch debug-shared-irqs.patch make-frame_pointer-default=y.patch mutex-subsystem-synchro-test-module.patch vdso-print-fatal-signals.patch vdso-improve-print_fatal_signals-support-by-adding-memory-maps.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html