The patch titled fix robust PI-futexes to be properly unlocked on unexpected exit has been added to the -mm tree. Its filename is fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit.patch See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find out what to do about this ------------------------------------------------------ Subject: fix robust PI-futexes to be properly unlocked on unexpected exit From: Ingo Molnar <mingo@xxxxxxx> For this the kernel has to know whether a futex is a PI or a non-PI one, because the semantics are different. Since the space in relevant glibc data structures is extremely scarce, the best solution is to encode the 'PI' information in bit 0 of the robust list pointer. Existing (non-PI) glibc robust futexes have this bit always zero, so the ABI is kept. New glibc with PI-robust-futexes will set this bit. further fixes: Signed-off-by: Ingo Molnar <mingo@xxxxxxx> Signed-off-by: Ulrich Drepper <drepper@xxxxxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- include/linux/futex.h | 3 - kernel/futex.c | 91 +++++++++++++++++++++++++++------------- kernel/futex_compat.c | 34 ++++++++++---- 3 files changed, 89 insertions(+), 39 deletions(-) diff -puN include/linux/futex.h~fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit include/linux/futex.h --- a/include/linux/futex.h~fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit +++ a/include/linux/futex.h @@ -96,7 +96,8 @@ struct robust_list_head { long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout, u32 __user *uaddr2, u32 val2, u32 val3); -extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); +extern int +handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); diff -puN kernel/futex.c~fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit kernel/futex.c --- a/kernel/futex.c~fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit +++ a/kernel/futex.c @@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_h } /* - * We are the first waiter - try to look up the real owner and - * attach the new pi_state to it: + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when the owner died bit is set + * and TID = 0: */ pid = uval & FUTEX_TID_MASK; + if (!pid && (uval & FUTEX_OWNER_DIED)) + return -ESRCH; p = futex_find_get_task(pid); if (!p) return -ESRCH; @@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uad * kept enabled while there is PI state around. We must also * preserve the owner died bit.) */ - newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; - - inc_preempt_count(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + newval = FUTEX_WAITERS | new_owner->pid; - if (curval == -EFAULT) - return -EFAULT; - if (curval != uval) - return -EINVAL; + inc_preempt_count(); + curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); + dec_preempt_count(); + if (curval == -EFAULT) + return -EFAULT; + if (curval != uval) + return -EINVAL; + } spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); @@ -1443,9 +1447,11 @@ retry_locked: * again. If it succeeds then we can return without waking * anyone else up: */ - inc_preempt_count(); - uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + inc_preempt_count(); + uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); + dec_preempt_count(); + } if (unlikely(uval == -EFAULT)) goto pi_faulted; @@ -1478,9 +1484,11 @@ retry_locked: /* * No waiters - kernel unlocks the futex: */ - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; + if (!(uval & FUTEX_OWNER_DIED)) { + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; + } out_unlock: spin_unlock(&hb->lock); @@ -1699,9 +1707,9 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) +int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { - u32 uval, nval; + u32 uval, nval, mval; retry: if (get_user(uval, uaddr)) @@ -1718,21 +1726,45 @@ retry: * thread-death.) The rest of the cleanup is done in * userspace. */ - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, - uval | FUTEX_OWNER_DIED); + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); + if (nval == -EFAULT) return -1; if (nval != uval) goto retry; - if (uval & FUTEX_WAITERS) - futex_wake(uaddr, 1); + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi) { + if (uval & FUTEX_WAITERS) + futex_wake(uaddr, 1); + } } return 0; } /* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int fetch_robust_entry(struct robust_list __user **entry, + struct robust_list __user **head, int *pi) +{ + unsigned long uentry; + + if (get_user(uentry, (unsigned long *)head)) + return -EFAULT; + + *entry = (void *)(uentry & ~1UL); + *pi = uentry & 1; + + return 0; +} + +/* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * @@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned long futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(entry, &head->list.next)) + if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: @@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(pending, &head->list_op_pending)) + if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; + if (pending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* @@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(entry, &entry->next)) + if (fetch_robust_entry(&entry, &entry->next, &pi)) return; /* * Avoid excessively long or circular lists: diff -puN kernel/futex_compat.c~fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit kernel/futex_compat.c --- a/kernel/futex_compat.c~fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit +++ a/kernel/futex_compat.c @@ -12,6 +12,23 @@ #include <asm/uaccess.h> + +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t *head, int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. @@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi; compat_uptr_t uentry, upending; - unsigned int limit = ROBUST_LIST_LIMIT; compat_long_t futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(uentry, &head->list.next)) + if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) return; - entry = compat_ptr(uentry); /* * Fetch the relative futex offset: */ @@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(upending, &head->list_op_pending)) + if (fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pi)) return; - pending = compat_ptr(upending); if (upending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pi); while (compat_ptr(uentry) != &head->list) { /* @@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(uentry, (compat_uptr_t *)&entry->next)) + if (fetch_robust_entry(&uentry, &entry, + (compat_uptr_t *)&entry->next, &pi)) return; - entry = compat_ptr(uentry); /* * Avoid excessively long or circular lists: */ _ Patches currently in -mm which might be from mingo@xxxxxxx are genirq-endisable_irq_wake-need-refcounting-too.patch disable-debugging-version-of-write_lock.patch git-netdev-all.patch lockdep-fix-sk_dst_check-deadlock.patch lockdep-split-the-skb_queue_head_init-lock-class.patch i386-early-fault-handler.patch make-touch_nmi_watchdog-imply-touch_softlockup_watchdog-on.patch make-touch_nmi_watchdog-imply-touch_softlockup_watchdog-on-fix.patch let-warn_on-warn_on_once-return-the-condition.patch let-warn_on-warn_on_once-return-the-condition-fix.patch let-warn_on-warn_on_once-return-the-condition-fix-2.patch fix-cond_resched-fix.patch spinlock_debug-dont-recompute-jiffies_per_loop.patch pi-futex-robust-futex-exit.patch pi-futex-missing-pi_waiters-plist-initialization.patch fix-robust-pi-futexes-to-be-properly-unlocked-on-unexpected-exit.patch inotify-fix-deadlock-found-by-lockdep.patch lockdep-dont-pull-in-includes-when-lockdep-disabled.patch reducing-local_bh_enable-disable-overhead-in-irqtrace.patch reference-rt-mutex-design-in-rtmutexc.patch ipc-msgc-clean-up-coding-style.patch sched-force-sbin-init-off-isolated-cpus.patch sched-add-above-background-load-function.patch mm-implement-swap-prefetching.patch sched-cleanup-remove-task_t-convert-to-struct-task_struct-prefetch.patch genirq-convert-the-x86_64-architecture-to-irq-chips.patch genirq-convert-the-i386-architecture-to-irq-chips.patch genirq-irq-convert-the-move_irq-flag-from-a-32bit-word-to-a-single-bit.patch genirq-irq-add-moved_masked_irq.patch genirq-x86_64-irq-reenable-migrating-irqs-to-other-cpus.patch genirq-msi-simplify-msi-enable-and-disable.patch genirq-msi-make-the-msi-boolean-tests-return-either-0-or-1.patch genirq-msi-implement-helper-functions-read_msi_msg-and-write_msi_msg.patch genirq-msi-refactor-the-msi_ops.patch genirq-msi-simplify-the-msi-irq-limit-policy.patch genirq-irq-add-a-dynamic-irq-creation-api.patch genirq-ia64-irq-dynamic-irq-support.patch genirq-i386-irq-dynamic-irq-support.patch genirq-x86_64-irq-dynamic-irq-support.patch genirq-msi-make-the-msi-code-irq-based-and-not-vector-based.patch genirq-x86_64-irq-move-msi-message-composition-into-io_apicc.patch genirq-i386-irq-move-msi-message-composition-into-io_apicc.patch genirq-msi-only-build-msi-apicc-on-ia64.patch genirq-x86_64-irq-remove-the-msi-assumption-that-irq-==-vector.patch genirq-i386-irq-remove-the-msi-assumption-that-irq-==-vector.patch genirq-irq-remove-msi-hacks.patch genirq-irq-generalize-the-check-for-hardirq_bits.patch genirq-x86_64-irq-make-the-external-irq-handlers-report-their-vector-not-the-irq-number.patch genirq-x86_64-irq-make-vector_irq-per-cpu.patch genirq-x86_64-irq-kill-gsi_irq_sharing.patch genirq-x86_64-irq-kill-irq-compression.patch detect-atomic-counter-underflows.patch debug-shared-irqs.patch make-frame_pointer-default=y.patch mutex-subsystem-synchro-test-module.patch vdso-print-fatal-signals.patch vdso-improve-print_fatal_signals-support-by-adding-memory-maps.patch - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html