[PATCH 11/15] Signal semantics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Sukadev Bhattiprolu <sukadev@xxxxxxxxxx>

With support for multiple pid namespaces, each pid namespace has a
separate child reaper and this process needs some special handling
of signals.
	- The child reaper should appear like a normal process to other
	  processes in its ancestor namespaces and so should be killable
	  (or not) in the usual way.

      	- The child reaper should receive, from processes in it's active
         and decendent namespaces, only those signals for which it has
         installed a signal handler.

	- System-wide signals (eg: kill signum -1) from within a child namespace
	  should only affect processes within that namespace and descendant
	  namespaces. They should not be posted to processes in ancestor or
	  sibling namespaces.

	- If the sender of a signal does not have a pid_t in the receiver's
	  namespace (eg: a process in init_pid_ns sends a signal to a process
	  in a descendant namespace), the sender's pid should appear as 0
	  in the signal's siginfo structure.

	- Existing rules for SIGIO delivery still apply and a process can
	  choose any other process in its namespace and descendant namespaces
	  to receive the SIGIO signal.
The following appears to be incorrect in the fcntl() man page for
	  F_SETOWN.

             Sending a signal to  the  owner  process  (group)  specified  by
             F_SETOWN  is  subject  to  the  same  permissions  checks as are
             described for kill(2), where the sending process is the one that
             employs F_SETOWN (but see BUGS below).


	Current behavior is that the SIGIO signal is delivered on behalf of
	the process that caused the event (eg: made data available on the
	file) and not the process that called fcntl().

To implement the above requirements, we:

	- Add a check in check_kill_permission() for a process within a
	  namespace sending the fast-pathed, SIGKILL signal.

	- We use a flag, SIGQUEUE_CINIT, to tell the container-init if
	  a signal posted to its queue is from a process within its own
	  namespace. The flag is set in send_signal() if a process attempts
	  to send a signal to its container-init.

	  The SIGQUEUE_CINIT flag is checked in collect_signal() - if
	  the flag is set,  collect_signal() sets the KERN_SIGINFO_CINIT
	  flag in the kern_siginfo. The KERN_SIGINFO_CINIT flag indicates
	  that the sender is from within the namespace and the container-init
	  can choose to ignore the signal.
If the KERN_SIGINFO_CINIT flag is clear in get_signal_to_deliver(),
	  the signal originated from an ancestor namespace and so the
	  container-init honors the signal.


Note: We currently use two flags, SIGQUEUE_CINIT, KERN_SIGINFO_CINIT to
	avoid modifying 'struct sigqueue'. If 'kern_siginfo' approach is
	feasible, we could use 'kern_siginfo' in sigqueue and eliminate
	SIGQUEUE_CINIT.

Signed-off-by: Sukadev Bhattiprolu <sukadev@xxxxxxxxxx>
Signed-off-by: Pavel Emelyanov <xemul@xxxxxxxxxx>

---

include/linux/pid.h    |    3 ++
include/linux/signal.h | 1 kernel/pid.c | 46 +++++++++++++++++++++++++++++++++++
kernel/signal.c        |   63 ++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 112 insertions(+), 1 deletion(-)

diff -upr linux-2.6.23-rc1-mm1.orig/include/linux/pid.h linux-2.6.23-rc1-mm1-7/include/linux/pid.h
--- linux-2.6.23-rc1-mm1.orig/include/linux/pid.h	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/include/linux/pid.h	2007-07-26 16:36:37.000000000 +0400
@@ -71,6 +77,9 @@ extern struct task_struct *FASTCALL(pid_
extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
						enum pid_type));

+extern int task_visible_in_pid_ns(struct task_struct *tsk,
+					struct pid_namespace *ns);
+extern int pid_ns_equal(struct task_struct *tsk);
extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);

/*
diff -upr linux-2.6.23-rc1-mm1.orig/include/linux/signal.h linux-2.6.23-rc1-mm1-7/include/linux/signal.h
--- linux-2.6.23-rc1-mm1.orig/include/linux/signal.h	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/include/linux/signal.h	2007-07-26 16:36:37.000000000 +0400
@@ -20,6 +27,7 @@ struct sigqueue {

/* flags values. */
#define SIGQUEUE_PREALLOC	1
+#define SIGQUEUE_CINIT		2

struct sigpending {
	struct list_head list;
diff -upr linux-2.6.23-rc1-mm1.orig/kernel/pid.c linux-2.6.23-rc1-mm1-7/kernel/pid.c
--- linux-2.6.23-rc1-mm1.orig/kernel/pid.c	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/kernel/pid.c	2007-07-26 16:36:37.000000000 +0400
@@ -318,6 +355,52 @@ struct task_struct * fastcall pid_task(s
}

/*
+ * Return TRUE if the task @p is visible in the pid namespace @ns
+ *
+ * Note: @p is visible in @ns if the active-pid-ns of @p is either equal to
+ * 	@ns or is a descendant of @ns.
+ *
+ * 	@p is not visible in @ns if active-pid-ns of @p is an ancestor of @ns.
+ * 	Eg: Processes in init-pid-ns are not visible in child pid namespaces.
+ * 	They should not receive any system-wide signals from a child-pid-
+ * 	namespace for instance.
+ */
+int task_visible_in_pid_ns(struct task_struct *p, struct pid_namespace *ns)
+{
+	int i;
+	struct pid *pid = task_pid(p);
+
+	if (!pid)
+		return 0;
+
+	for (i = 0; i <= pid->level; i++) {
+		if (pid->numbers[i].ns == ns)
+			return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(task_visible_in_pid_ns);
+
+/*
+ * Return TRUE if the active pid namespace of @tsk is same as active
+ * pid namespace of 'current'
+ */
+
+static inline struct pid_namespace *pid_active_ns(struct pid *pid)
+{
+	if (pid == NULL)
+		return NULL;
+
+	return pid->numbers[pid->level].ns;
+}
+
+int pid_ns_equal(struct task_struct *tsk)
+{
+	return pid_active_ns(task_pid(tsk)) == pid_active_ns(task_pid(current));
+}
+
+/*
 * Must be called under rcu_read_lock() or with tasklist_lock read-held.
 */
struct task_struct *find_task_by_pid_type_ns(int type, int nr,
diff -upr linux-2.6.23-rc1-mm1.orig/kernel/signal.c linux-2.6.23-rc1-mm1-7/kernel/signal.c
--- linux-2.6.23-rc1-mm1.orig/kernel/signal.c	2007-07-26 16:34:45.000000000 +0400
+++ linux-2.6.23-rc1-mm1-7/kernel/signal.c	2007-07-26 16:36:37.000000000 +0400
@@ -323,6 +325,9 @@ static int collect_signal(int sig, struc
	if (first) {
		list_del_init(&first->list);
		copy_siginfo(info, &first->info);
+		if (first->flags & SIGQUEUE_CINIT)
+			kinfo->flags |= KERN_SIGINFO_CINIT;
+
		__sigqueue_free(first);
		if (!still_pending)
			sigdelset(&list->signal, sig);
@@ -343,6 +348,8 @@ static int collect_signal(int sig, struc
{
	int sig = next_signal(pending, mask);

+	kinfo->flags &= ~KERN_SIGINFO_CINIT;
+
	if (sig) {
		if (current->notifier) {
			if (sigismember(current->notifier_mask, sig)) {
@@ -522,6 +547,20 @@ static int rm_from_queue(unsigned long m
	return 1;
}

+static int deny_signal_to_container_init(struct task_struct *tsk, int sig)
+{
+	/*
+	 * If receiver is the container-init of sender and signal is SIGKILL
+	 * reject it right-away. If signal is any other one, let the container
+	 * init decide (in get_signal_to_deliver()) whether to handle it or
+	 * ignore it.
+	 */
+	if (is_container_init(tsk) && (sig == SIGKILL) && pid_ns_equal(tsk))
+		return -EPERM;
+
+	return 0;
+}
+
/*
 * Bad permissions for sending the signal
 */
@@ -545,6 +584,10 @@ static int check_kill_permission(int sig
	    && !capable(CAP_KILL))
		return error;

+	error = deny_signal_to_container_init(t, sig);
+	if (error)
+		return error;
+
	return security_task_kill(t, info, sig, 0);
}

@@ -659,6 +702,34 @@ static void handle_stop_signal(int sig, }
}

+static void encode_sender_info(struct task_struct *t, struct sigqueue *q)
+{
+	/*
+	 * If sender (i.e 'current') and receiver have the same active
+	 * pid namespace and the receiver is the container-init, set the
+	 * SIGQUEUE_CINIT flag. This tells the container-init that the
+	 * signal originated in its own namespace and so it can choose
+	 * to ignore the signal.
+	 *
+	 * If the receiver is the container-init of a pid namespace,
+	 * but the sender is from an ancestor pid namespace, the
+	 * container-init cannot ignore the signal. So clear the
+	 * SIGQUEUE_CINIT flag in this case.
+	 *
+	 * Also, if the sender does not have a pid_t in the receiver's
+	 * active pid namespace, set si_pid to 0 and pretend it originated
+	 * from the kernel.
+	 */
+	if (pid_ns_equal(t)) {
+		if (is_container_init(t)) {
+			q->flags |= SIGQUEUE_CINIT;
+		}
+	} else {
+		q->info.si_pid = 0;
+		q->info.si_code = SI_KERNEL;
+	}
+}
+
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
			struct sigpending *signals)
{
@@ -710,6 +781,7 @@ static int send_signal(int sig, struct s
			copy_siginfo(&q->info, info);
			break;
		}
+		encode_sender_info(t, q);
	} else if (!is_si_special(info)) {
		if (sig >= SIGRTMIN && info->si_code != SI_USER)
		/*
@@ -1149,6 +1221,8 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
static int kill_something_info(int sig, struct siginfo *info, int pid)
{
	int ret;
+	struct pid_namespace *my_ns = task_active_pid_ns(current);
+
	rcu_read_lock();
	if (!pid) {
		ret = kill_pgrp_info(sig, info, task_pgrp(current));
@@ -1158,6 +1232,13 @@ static int kill_something_info(int sig,
		read_lock(&tasklist_lock);
		for_each_process(p) {
+			/*
+			 * System-wide signals apply only to the sender's
+			 * pid namespace, unless issued from init_pid_ns.
+			 */
+			if (!task_visible_in_pid_ns(p, my_ns))
+				continue;
+
			if (p->pid > 1 && p->tgid != current->tgid) {
				int err = group_send_sig_info(sig, info, p);
				++count;
@@ -1852,7 +1950,7 @@ relock:
		 * within that pid space. It can of course get signals from
		 * its parent pid space.
		 */
-		if (current == task_child_reaper(current))
+		if (kinfo.flags & KERN_SIGINFO_CINIT)
			continue;

		if (sig_kernel_stop(signr)) {

_______________________________________________
Containers mailing list
Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linux-foundation.org/mailman/listinfo/containers

[Index of Archives]     [Cgroups]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux