Commit-ID: 18fca1434417b697bac4936b6c6cecab4a4842de Gitweb: http://git.kernel.org/tip/18fca1434417b697bac4936b6c6cecab4a4842de Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx> AuthorDate: Thu, 11 Jun 2009 23:15:43 +0200 Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx> CommitDate: Fri, 12 Jun 2009 09:42:28 +0200 futex: Fix the write access fault problem for real commit 64d1304a64 (futex: setup writeable mapping for futex ops which modify user space data) did address only half of the problem of write access faults. The patch was made on two wrong assumptions: 1) access_ok(VERIFY_WRITE,...) would actually check write access. On x86 it does _NOT_. It's a pure address range check. 2) a RW mapped region can not go away under us. That's wrong as well. Nobody can prevent another thread to call mprotect(PROT_READ) on that region where the futex resides. If that call hits between the get_user_pages_fast() verification and the actual write access in the atomic region we are toast again. The solution is to not rely on access_ok and get_user() for any write access related fault on private and shared futexes. Instead we need to go through get_user_pages_fast() in the fault path to avoid any of the above pitfalls. If get_user_pages_fast() returns -EFAULT we know that we can not fix it anymore and need to bail out to user space. Remove a bunch of confusing comments on this issue as well. [@stable: I'm providing a backport as the code has changed significantly ] Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Acked-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: stable@xxxxxxxxxx --- kernel/futex.c | 51 ++++++++++++++++++++++++++++++--------------------- 1 files changed, 30 insertions(+), 21 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 80b5ce7..c41a193 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -284,6 +284,31 @@ void put_futex_key(int fshared, union futex_key *key) drop_futex_key_refs(key); } +/* + * get_user_writeable - get user page and verify RW access + * @uaddr: pointer to faulting user space address + * + * We cannot write to the user space address and get_user just faults + * the page in, but does not tell us whether the mapping is writeable. + * + * We can not rely on access_ok() for private futexes as it is just a + * range check and we can neither rely on get_user_pages() as there + * might be a mprotect(PROT_READ) for that mapping after + * get_user_pages() and before the fault in the atomic write access. + */ +static int get_user_writeable(u32 __user *uaddr) +{ + unsigned long addr = (unsigned long)uaddr; + struct page *page; + int ret; + + ret = get_user_pages_fast(addr, 1, 1, &page); + if (!ret) + put_page(page); + + return ret; +} + /** * futex_top_waiter() - Return the highest priority waiter on a futex * @hb: the hash bucket the futex_q's reside in @@ -896,7 +921,6 @@ retry: retry_private: op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { - u32 dummy; double_unlock_hb(hb1, hb2); @@ -914,7 +938,7 @@ retry_private: goto out_put_keys; } - ret = get_user(dummy, uaddr2); + ret = get_user_writeable(uaddr2); if (ret) goto out_put_keys; @@ -1204,7 +1228,7 @@ retry_private: double_unlock_hb(hb1, hb2); put_futex_key(fshared, &key2); put_futex_key(fshared, &key1); - ret = get_user(curval2, uaddr2); + ret = get_user_writeable(uaddr2); if (!ret) goto retry; goto out; @@ -1482,7 +1506,7 @@ retry: handle_fault: spin_unlock(q->lock_ptr); - ret = get_user(uval, uaddr); + ret = get_user_writeable(uaddr); spin_lock(q->lock_ptr); @@ -1807,7 +1831,6 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, { struct hrtimer_sleeper timeout, *to = NULL; struct futex_hash_bucket *hb; - u32 uval; struct futex_q q; int res, ret; @@ -1909,16 +1932,9 @@ out: return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: - /* - * We have to r/w *(int __user *)uaddr, and we have to modify it - * atomically. Therefore, if we continue to fault after get_user() - * below, we need to handle the fault ourselves, while still holding - * the mmap_sem. This can occur if the uaddr is under contention as - * we have to drop the mmap_sem in order to call get_user(). - */ queue_unlock(&q, hb); - ret = get_user(uval, uaddr); + ret = get_user_writeable(uaddr); if (ret) goto out_put_key; @@ -2013,17 +2029,10 @@ out: return ret; pi_faulted: - /* - * We have to r/w *(int __user *)uaddr, and we have to modify it - * atomically. Therefore, if we continue to fault after get_user() - * below, we need to handle the fault ourselves, while still holding - * the mmap_sem. This can occur if the uaddr is under contention as - * we have to drop the mmap_sem in order to call get_user(). - */ spin_unlock(&hb->lock); put_futex_key(fshared, &key); - ret = get_user(uval, uaddr); + ret = get_user_writeable(uaddr); if (!ret) goto retry; -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html