Re: tools/testing/selftests/ptrace/vmaccess.c hangs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/28/21 7:20 PM, Vitaly Chikunov wrote:
> Hi,
> 
> Possible ptrace related bug in tools/testing/selftests/ptrace/vmaccess.c -
> it timeout every time I run it on any kernel I try, in vm or bare metal.
> 
> Does it run correctly for anybody?
> 

No, that is an open issue, I wrote the test case in anticipation that all of
my patch series got accepted, but the last patch was not picked up for inclusion
in the linux kernel.

Well, actually I still have the patch, see attached patch file.  This makes
the test case pass.

The idea is that PTRACE_ATTACH returns EAGAIN in this case, and the tracer
calls waitpid if that return value is received, which makes the zombie process die,
and then PTRACE_ATTACH will succeed.

Eric wanted to propose a different solution though.
Eric, have you meanwhile been able to come up with a better solution?


Thanks,
Bernd.


> Example vmaccess from v5.12 source tree run on 5.13.0+rc2 on bare metal
> Supermicro server (AMD EPYC):
> 
>   5.10.35-rt-alt1.rt39:~# /usr/lib/kselftests/ptrace/vmaccess
>   TAP version 13
>   1..2
>   # Starting 2 tests from 1 test cases.
>   #  RUN           global.vmaccess ...
>   #            OK  global.vmaccess
>   ok 1 global.vmaccess
>   #  RUN           global.attach ...
>   # attach: Test terminated by timeout
>   #          FAIL  global.attach
>   not ok 2 global.attach
>   # FAILED: 1 / 2 tests passed.
>   # Totals: pass:1 fail:1 xfail:0 xpass:0 skip:0 error:0
> 
> Just to confirm for the latest kernel, it behaves the same [for drm-tip
> based] on 5.13rc2. Also, just tested on 5.11.21 with the same failure.
> 
> Other ptrace tests pass.
> 
> Thanks,
> 
From c5afacba607c38f4c9d9bf106a071f9737ba1479 Mon Sep 17 00:00:00 2001
From: Bernd Edlinger <bernd.edlinger@xxxxxxxxxx>
Date: Tue, 10 Mar 2020 13:20:57 +0100
Subject: [PATCH 1/2] exec: Fix dead-lock in de_thread with ptrace_attach

This removes the last users of cred_guard_mutex
and replaces it with a new mutex exec_guard_mutex,
and a boolean unsafe_execve_in_progress.

This addresses the case when at least one of the
sibling threads is traced, and therefore the trace
process may dead-lock in ptrace_attach, but de_thread
will need to wait for the tracer to continue execution.

The solution is to detect this situation and make
ptrace_attach and similar functions return -EAGAIN,
but only in a situation where a dead-lock is imminent.

This means this is an API change, but only when the
process is traced while execve happens in a
multi-threaded application.

See tools/testing/selftests/ptrace/vmaccess.c
for a test case that gets fixed by this change.

Signed-off-by: Bernd Edlinger <bernd.edlinger@xxxxxxxxxx>
---
 fs/exec.c                    | 42 ++++++++++++++++++++++++++++++++++--------
 fs/proc/base.c               | 20 ++++++++++++++++++--
 include/linux/sched/signal.h | 13 ++++++++++---
 init/init_task.c             |  2 +-
 kernel/cred.c                |  2 +-
 kernel/fork.c                |  2 +-
 kernel/ptrace.c              | 44 +++++++++++++++++++++++++++++++++++++++++---
 kernel/seccomp.c             | 25 +++++++++++++++++++------
 8 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 18594f1..6ab93c0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1040,14 +1040,26 @@ static int de_thread(struct task_struct *tsk)
 	struct signal_struct *sig = tsk->signal;
 	struct sighand_struct *oldsighand = tsk->sighand;
 	spinlock_t *lock = &oldsighand->siglock;
+	struct task_struct *t = tsk;
 
 	if (thread_group_empty(tsk))
 		goto no_thread_group;
 
+	spin_lock_irq(lock);
+	while_each_thread(tsk, t) {
+		if (unlikely(t->ptrace))
+			sig->unsafe_execve_in_progress = true;
+	}
+
+	if (unlikely(sig->unsafe_execve_in_progress)) {
+		spin_unlock_irq(lock);
+		mutex_unlock(&sig->exec_guard_mutex);
+		spin_lock_irq(lock);
+	}
+
 	/*
 	 * Kill all other threads in the thread group.
 	 */
-	spin_lock_irq(lock);
 	if (signal_group_exit(sig)) {
 		/*
 		 * Another group action in progress, just
@@ -1438,7 +1450,10 @@ void setup_new_exec(struct linux_binprm * bprm)
 	 */
 	me->mm->task_size = TASK_SIZE;
 	up_write(&me->signal->exec_update_lock);
-	mutex_unlock(&me->signal->cred_guard_mutex);
+	if (unlikely(me->signal->unsafe_execve_in_progress))
+		mutex_lock(&me->signal->exec_guard_mutex);
+	current->signal->unsafe_execve_in_progress = false;
+	mutex_unlock(&me->signal->exec_guard_mutex);
 }
 EXPORT_SYMBOL(setup_new_exec);
 
@@ -1453,22 +1468,30 @@ void finalize_exec(struct linux_binprm *bprm)
 EXPORT_SYMBOL(finalize_exec);
 
 /*
- * Prepare credentials and lock ->cred_guard_mutex.
+ * Prepare credentials and lock ->exec_guard_mutex.
  * setup_new_exec() commits the new creds and drops the lock.
  * Or, if exec fails before, free_bprm() should release ->cred
  * and unlock.
  */
 static int prepare_bprm_creds(struct linux_binprm *bprm)
 {
-	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
+	int ret;
+
+	if (mutex_lock_interruptible(&current->signal->exec_guard_mutex))
 		return -ERESTARTNOINTR;
 
+	ret = -EBUSY;
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		goto out;
+
 	bprm->cred = prepare_exec_creds();
 	if (likely(bprm->cred))
 		return 0;
 
-	mutex_unlock(&current->signal->cred_guard_mutex);
-	return -ENOMEM;
+	ret = -ENOMEM;
+out:
+	mutex_unlock(&current->signal->exec_guard_mutex);
+	return ret;
 }
 
 static void free_bprm(struct linux_binprm *bprm)
@@ -1479,7 +1502,10 @@ static void free_bprm(struct linux_binprm *bprm)
 	}
 	free_arg_pages(bprm);
 	if (bprm->cred) {
-		mutex_unlock(&current->signal->cred_guard_mutex);
+		if (unlikely(current->signal->unsafe_execve_in_progress))
+			mutex_lock(&current->signal->exec_guard_mutex);
+		current->signal->unsafe_execve_in_progress = false;
+		mutex_unlock(&current->signal->exec_guard_mutex);
 		abort_creds(bprm->cred);
 	}
 	if (bprm->file) {
@@ -1542,7 +1568,7 @@ int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold ->cred_guard_mutex to protect against
+ * - the caller must hold ->exec_guard_mutex to protect against
  *   PTRACE_ATTACH or seccomp thread-sync
  */
 static void check_unsafe_exec(struct linux_binprm *bprm)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3851bfc..fe2833c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2735,14 +2735,30 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	}
 
 	/* Guard against adverse ptrace interaction */
-	rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
+	rv = mutex_lock_interruptible(&current->signal->exec_guard_mutex);
 	if (rv < 0)
 		goto out_free;
 
+	/*
+	 * BIG FAT WARNING - Fragile code ahead.
+	 * Please do not insert any code between these two
+	 * if statements.  It may happen that execve has to
+	 * release the exec_guard_mutex in order to prevent
+	 * deadlocks.  In that case unsafe_execve_in_progress
+	 * will be set.  If that happens you cannot assume that
+	 * the usual guarantees implied by exec_guard_mutex
+	 * are valid.  Just return -EBUSY in that case and
+	 * unlock the mutex immediately.
+	 */
+	rv = -EBUSY;
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		goto out_unlock;
+
 	rv = security_setprocattr(PROC_I(inode)->op.lsm,
 				  file->f_path.dentry->d_name.name, page,
 				  count);
-	mutex_unlock(&current->signal->cred_guard_mutex);
+out_unlock:
+	mutex_unlock(&current->signal->exec_guard_mutex);
 out_free:
 	kfree(page);
 out:
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 3f6a0fc..407545a 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -214,6 +214,13 @@ struct signal_struct {
 #endif
 
 	/*
+	 * Set while execve is executing but is *not* holding
+	 * exec_guard_mutex to avoid possible dead-locks.
+	 * Only valid when exec_guard_mutex is held.
+	 */
+	bool unsafe_execve_in_progress;
+
+	/*
 	 * Thread is the potential origin of an oom condition; kill first on
 	 * oom
 	 */
@@ -224,11 +231,11 @@ struct signal_struct {
 	struct mm_struct *oom_mm;	/* recorded mm when the thread group got
 					 * killed by the oom killer */
 
-	struct mutex cred_guard_mutex;	/* guard against foreign influences on
+	struct mutex exec_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
 					 * (notably. ptrace)
-					 * Deprecated do not use in new code.
-					 * Use exec_update_lock instead.
+					 * Held while execve runs, except when
+					 * a sibling thread is being traced.
 					 */
 	struct rw_semaphore exec_update_lock;	/* Held while task_struct is
 						 * being updated during exec,
diff --git a/init/init_task.c b/init/init_task.c
index 3711cda..b917349 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -25,7 +25,7 @@
 	},
 	.multiprocess	= HLIST_HEAD_INIT,
 	.rlim		= INIT_RLIMITS,
-	.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
+	.exec_guard_mutex = __MUTEX_INITIALIZER(init_signals.exec_guard_mutex),
 	.exec_update_lock = __RWSEM_INITIALIZER(init_signals.exec_update_lock),
 #ifdef CONFIG_POSIX_TIMERS
 	.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b114..f408024 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -295,7 +295,7 @@ struct cred *prepare_creds(void)
 
 /*
  * Prepare credentials for current to perform an execve()
- * - The caller must hold ->cred_guard_mutex
+ * - The caller must hold ->exec_guard_mutex
  */
 struct cred *prepare_exec_creds(void)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 426cd0c..5602226 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1601,7 +1601,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->oom_score_adj = current->signal->oom_score_adj;
 	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
 
-	mutex_init(&sig->cred_guard_mutex);
+	mutex_init(&sig->exec_guard_mutex);
 	init_rwsem(&sig->exec_update_lock);
 
 	return 0;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61db50f..e052586 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -386,9 +386,26 @@ static int ptrace_attach(struct task_struct *task, long request,
 	 * under ptrace.
 	 */
 	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
+	if (mutex_lock_interruptible(&task->signal->exec_guard_mutex))
 		goto out;
 
+	/*
+	 * BIG FAT WARNING - Fragile code ahead.
+	 * Please do not insert any code between these two
+	 * if statements.  It may happen that execve has to
+	 * release the exec_guard_mutex in order to prevent
+	 * deadlocks.  In that case unsafe_execve_in_progress
+	 * will be set.  If that happens you cannot assume that
+	 * the usual guarantees implied by exec_guard_mutex
+	 * are valid.  Just return -EAGAIN in that case and
+	 * unlock the mutex immediately.
+	 * The tracer is expected to call wait(2) and handle
+	 * possible events before calling this API again.
+	 */
+	retval = -EAGAIN;
+	if (unlikely(task->signal->unsafe_execve_in_progress))
+		goto unlock_creds;
+
 	task_lock(task);
 	retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
 	task_unlock(task);
@@ -441,7 +458,7 @@ static int ptrace_attach(struct task_struct *task, long request,
 unlock_tasklist:
 	write_unlock_irq(&tasklist_lock);
 unlock_creds:
-	mutex_unlock(&task->signal->cred_guard_mutex);
+	mutex_unlock(&task->signal->exec_guard_mutex);
 out:
 	if (!retval) {
 		/*
@@ -466,10 +483,29 @@ static int ptrace_attach(struct task_struct *task, long request,
  */
 static int ptrace_traceme(void)
 {
-	int ret = -EPERM;
+	int ret;
+
+	if (mutex_lock_interruptible(&current->signal->exec_guard_mutex))
+		return -ERESTARTNOINTR;
+
+	/*
+	 * BIG FAT WARNING - Fragile code ahead.
+	 * Please do not insert any code between these two
+	 * if statements.  It may happen that execve has to
+	 * release the exec_guard_mutex in order to prevent
+	 * deadlocks.  In that case unsafe_execve_in_progress
+	 * will be set.  If that happens you cannot assume that
+	 * the usual guarantees implied by exec_guard_mutex
+	 * are valid.  Just return -EAGAIN in that case and
+	 * unlock the mutex immediately.
+	 */
+	ret = -EAGAIN;
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		goto unlock_creds;
 
 	write_lock_irq(&tasklist_lock);
 	/* Are we already being traced? */
+	ret = -EPERM;
 	if (!current->ptrace) {
 		ret = security_ptrace_traceme(current->parent);
 		/*
@@ -484,6 +520,8 @@ static int ptrace_traceme(void)
 	}
 	write_unlock_irq(&tasklist_lock);
 
+unlock_creds:
+	mutex_unlock(&current->signal->exec_guard_mutex);
 	return ret;
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1d60fc2..9292b25 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -466,7 +466,7 @@ static int is_ancestor(struct seccomp_filter *parent,
 /**
  * seccomp_can_sync_threads: checks if all threads can be synchronized
  *
- * Expects sighand and cred_guard_mutex locks to be held.
+ * Expects sighand and exec_guard_mutex locks to be held.
  *
  * Returns 0 on success, -ve on error, or the pid of a thread which was
  * either not in the correct seccomp mode or did not have an ancestral
@@ -476,9 +476,22 @@ static inline pid_t seccomp_can_sync_threads(void)
 {
 	struct task_struct *thread, *caller;
 
-	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!mutex_is_locked(&current->signal->exec_guard_mutex));
 	assert_spin_locked(&current->sighand->siglock);
 
+	/*
+	 * BIG FAT WARNING - Fragile code ahead.
+	 * It may happen that execve has to release the
+	 * exec_guard_mutex in order to prevent deadlocks.
+	 * In that case unsafe_execve_in_progress will be set.
+	 * If that happens you cannot assume that the usual
+	 * guarantees implied by exec_guard_mutex are valid.
+	 * Just return -EAGAIN in that case and unlock the mutex
+	 * immediately.
+	 */
+	if (unlikely(current->signal->unsafe_execve_in_progress))
+		return -EAGAIN;
+
 	/* Validate all threads being eligible for synchronization. */
 	caller = current;
 	for_each_thread(caller, thread) {
@@ -564,7 +577,7 @@ void seccomp_filter_release(struct task_struct *tsk)
 /**
  * seccomp_sync_threads: sets all threads to use current's filter
  *
- * Expects sighand and cred_guard_mutex locks to be held, and for
+ * Expects sighand and exec_guard_mutex locks to be held, and for
  * seccomp_can_sync_threads() to have returned success already
  * without dropping the locks.
  *
@@ -573,7 +586,7 @@ static inline void seccomp_sync_threads(unsigned long flags)
 {
 	struct task_struct *thread, *caller;
 
-	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!mutex_is_locked(&current->signal->exec_guard_mutex));
 	assert_spin_locked(&current->sighand->siglock);
 
 	/* Synchronize all threads. */
@@ -1825,7 +1838,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	 * while another thread is in the middle of calling exec.
 	 */
 	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
-	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+	    mutex_lock_killable(&current->signal->exec_guard_mutex))
 		goto out_put_fd;
 
 	spin_lock_irq(&current->sighand->siglock);
@@ -1848,7 +1861,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 out:
 	spin_unlock_irq(&current->sighand->siglock);
 	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
-		mutex_unlock(&current->signal->cred_guard_mutex);
+		mutex_unlock(&current->signal->exec_guard_mutex);
 out_put_fd:
 	if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
 		if (ret) {
-- 
1.9.1


[Index of Archives]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Device Mapper]

  Powered by Linux