[RFC v1 06/17] seccomp: Add the SECCOMP_ADD_CHECKER_GROUP command

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A new command SECCOMP_ADD_CHECKER_GROUP allows userland seccomp filters
to reference kernel objects with checkers in a batch.

Each checker is autonomous and new ones can easily be added in the
future. There is currently two checkers for path objects:
* SECCOMP_CHECK_FS_LITERAL checks if a string match a defined path;
* SECCOMP_CHECK_FS_BENEATH checks if the path representation of a string
  is equal or equivalent to a file belonging to a defined path.

These checkers can use a bitmask of flags to match a path:
* SECCOMP_OBJFLAG_FS_DENTRY match a unique file;
* SECCOMP_OBJFLAG_FS_INODE only match a file inode (must be used with
  the device flag);
* SECCOMP_OBJFLAG_FS_DEVICE match the device of a file;
* SECCOMP_OBJFLAG_FS_MOUNT match the mount point of a file;
* SECCOMP_OBJFLAG_FS_NOFOLLOW do not follow a symlink for the
  initial checker evaluation.

Signed-off-by: Mickaël Salaün <mic@xxxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: David Drysdale <drysdale@xxxxxxxxxx>
Cc: James Morris <james.l.morris@xxxxxxxxxx>
Cc: Kees Cook <keescook@xxxxxxxxxxxx>
Cc: Michael Kerrisk <mtk@xxxxxxxx>
Cc: Paul Moore <pmoore@xxxxxxxxxx>
Cc: Serge E. Hallyn <serge@xxxxxxxxxx>
Cc: Will Drewry <wad@xxxxxxxxxxxx>
---
 include/linux/seccomp.h       |  32 ++++++
 include/uapi/linux/seccomp.h  |  81 ++++++++++++++++
 kernel/seccomp.c              | 221 ++++++++++++++++++++++++++++++++++++++++++
 security/seccomp/Makefile     |   2 +-
 security/seccomp/checker_fs.c | 102 +++++++++++++++++++
 5 files changed, 437 insertions(+), 1 deletion(-)
 create mode 100644 security/seccomp/checker_fs.c

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 2296e6b2f690..78f5861a0328 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -9,8 +9,10 @@
 
 #include <linux/thread_info.h>
 #include <asm/seccomp.h>
+#include <linux/path.h>
 
 struct seccomp_filter;
+struct seccomp_filter_checker_group;
 /**
  * struct seccomp - the state of a seccomp'ed process
  *
@@ -19,12 +21,20 @@ struct seccomp_filter;
  * @filter: must always point to a valid seccomp-filter or NULL as it is
  *          accessed without locking during system call entry.
  *
+ * @checker_group: an append-only list of argument checkers usable by filters
+ *                 created after the last update.
+ *
  *          @filter must only be accessed from the context of current as there
  *          is no read locking.
  */
 struct seccomp {
 	int mode;
 	struct seccomp_filter *filter;
+
+#ifdef CONFIG_SECURITY_SECCOMP
+	/* @checker_group is only used for filter creation and unique per thread */
+	struct seccomp_filter_checker_group *checker_group;
+#endif
 };
 
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
@@ -85,6 +95,28 @@ static inline int seccomp_mode(struct seccomp *s)
 #ifdef CONFIG_SECCOMP_FILTER
 extern void put_seccomp_filter(struct task_struct *tsk);
 extern void get_seccomp_filter(struct task_struct *tsk);
+
+#ifdef CONFIG_SECURITY_SECCOMP
+struct seccomp_filter_object_path {
+	u32 flags;
+	struct path path;
+};
+
+struct seccomp_filter_checker {
+	/* e.g. SECCOMP_ARGCHECK_FS_LITERAL */
+	u32 check;
+	/* e.g. SECCOMP_ARGTYPE_PATH */
+	u32 type;
+	union {
+		struct seccomp_filter_object_path object_path;
+	};
+};
+
+
+long seccomp_set_argcheck_fs(const struct seccomp_checker *,
+			     struct seccomp_filter_checker *);
+#endif /* CONFIG_SECURITY_SECCOMP */
+
 #else  /* CONFIG_SECCOMP_FILTER */
 static inline void put_seccomp_filter(struct task_struct *tsk)
 {
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..ca7e9343f3d7 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -13,6 +13,7 @@
 /* Valid operations for seccomp syscall. */
 #define SECCOMP_SET_MODE_STRICT	0
 #define SECCOMP_SET_MODE_FILTER	1
+#define SECCOMP_ADD_CHECKER_GROUP	2 /* add a group of checkers */
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
@@ -35,6 +36,25 @@
 #define SECCOMP_RET_ACTION	0x7fff0000U
 #define SECCOMP_RET_DATA	0x0000ffffU
 
+/* Object checks */
+#define SECCOMP_CHECK_FS_LITERAL	1
+#define SECCOMP_CHECK_FS_BENEATH	2
+
+/* Object flags */
+#define SECCOMP_OBJFLAG_FS_DENTRY	(1 << 0)
+#define SECCOMP_OBJFLAG_FS_INODE	(1 << 1)
+#define SECCOMP_OBJFLAG_FS_DEVICE	(1 << 2)
+#define SECCOMP_OBJFLAG_FS_MOUNT	(1 << 3)
+/* Do the evaluation follow the argument path? (cf. fs/namei.c)
+ * This flag is only used for the seccomp filter but not by the LSM check to
+ * enforce access control. You need to take care of the different path
+ * interpretation per syscall (e.g. rename(2) or open(2) with O_NOFOLLOW).
+ */
+#define SECCOMP_OBJFLAG_FS_NOFOLLOW	(1 << 4)
+
+/* Argument types */
+#define SECCOMP_OBJTYPE_PATH		1
+
 /**
  * struct seccomp_data - the format the BPF program executes over.
  * @nr: the system call number
@@ -51,4 +71,65 @@ struct seccomp_data {
 	__u64 args[6];
 };
 
+/* TODO: Add a "at" field (default to AT_FDCWD) */
+struct seccomp_object_path {
+	/* e.g. SECCOMP_OBJFLAG_FS_DENTRY */
+	__u32 flags;
+	const char *path;
+};
+
+struct seccomp_checker {
+	__u32 check;
+	__u32 type;
+	/* Must match the checker extra size, if any */
+	unsigned int len;
+	/* Checkers must be pointers to allow futur additions */
+	union {
+		const struct seccomp_object_path *object_path;
+	};
+};
+
+#define SECCOMP_MAKE_PATH_DENTRY(_p)				\
+	{							\
+		.flags = SECCOMP_OBJFLAG_FS_DENTRY,		\
+		.path = _p,					\
+	}
+
+#define SECCOMP_MAKE_PATH_INODE(_p)				\
+	{							\
+		.flags = SECCOMP_OBJFLAG_FS_INODE |		\
+			SECCOMP_OBJFLAG_FS_DEVICE,		\
+		.path = _p,					\
+	}
+
+#define SECCOMP_MAKE_PATH_MOUNT(_p)				\
+	{							\
+		.flags = SECCOMP_OBJFLAG_FS_MOUNT,		\
+		.path = _p,					\
+	}
+
+#define SECCOMP_MAKE_PATH_ALL(_p)				\
+	{							\
+		.flags = SECCOMP_OBJFLAG_FS_DENTRY |		\
+			SECCOMP_OBJFLAG_FS_INODE |		\
+			SECCOMP_OBJFLAG_FS_DEVICE |		\
+			SECCOMP_OBJFLAG_FS_MOUNT,		\
+		.path = _p,					\
+	}
+
+#define SECCOMP_MAKE_OBJ_PATH(_c, _p)				\
+	{							\
+		.check = SECCOMP_CHECK_##_c,			\
+		.type = SECCOMP_OBJTYPE_PATH,			\
+		.len = 0,					\
+		.object_path = _p,				\
+	}
+
+struct seccomp_checker_group {
+	__u8 version;
+	__u8 id;
+	unsigned int len;
+	const struct seccomp_checker (*checkers)[];
+};
+
 #endif /* _UAPI_LINUX_SECCOMP_H */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 2c94693e4163..0e5471d2891c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -6,6 +6,8 @@
  * Copyright (C) 2012 Google, Inc.
  * Will Drewry <wad@xxxxxxxxxxxx>
  *
+ * Copyright (C) 2016  Mickaël Salaün <mic@xxxxxxxxxxx>
+ *
  * This defines a simple but solid secure-computing facility.
  *
  * Mode 1 uses a fixed list of allowed system calls.
@@ -60,6 +62,34 @@ struct seccomp_filter {
 	struct bpf_prog *prog;
 };
 
+/* Argument group attached to seccomp filters
+ *
+ * @usage keep track of the references
+ * @prev link to the previous checker_group
+ * @id is given by userland to easely check a filter statically and not
+ *     leak data from the kernel
+ * @checkers_len is the number of @checkers elements
+ * @checkers contains the checkers
+ *
+ * seccomp_filter_checker_group checkers are organized in a tree linked via the
+ * @prev pointer. For any task, it appears to be a singly-linked list starting
+ * with current->seccomp.filter->checker_group, the most recently added argument
+ * group. All filters created by a process share the argument groups created by
+ * this process until the filter creation but they can not be changed. However,
+ * multiple argument groups may share a @prev node, which results in a
+ * unidirectional tree existing in memory. They are not inherited through
+ * fork().
+ */
+#ifdef CONFIG_SECURITY_SECCOMP
+struct seccomp_filter_checker_group {
+	atomic_t usage;
+	struct seccomp_filter_checker_group *prev;
+	u8 id;
+	unsigned int checkers_len;
+	struct seccomp_filter_checker checkers[];
+};
+#endif /* CONFIG_SECURITY_SECCOMP */
+
 /* Limit any path through the tree to 256KB worth of instructions. */
 #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
 
@@ -467,6 +497,38 @@ void get_seccomp_filter(struct task_struct *tsk)
 	atomic_inc(&orig->usage);
 }
 
+#ifdef CONFIG_SECURITY_SECCOMP
+/* Do not free @checker */
+static void put_seccomp_obj(struct seccomp_filter_checker *checker)
+{
+	switch (checker->type) {
+	case SECCOMP_OBJTYPE_PATH:
+		/* Pointer checks done in path_put() */
+		path_put(&checker->object_path.path);
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
+/* Free @checker_group */
+static void put_seccomp_checker_group(struct seccomp_filter_checker_group *checker_group)
+{
+	int i;
+	struct seccomp_filter_checker_group *orig = checker_group;
+
+	/* Clean up single-reference branches iteratively. */
+	while (orig && atomic_dec_and_test(&orig->usage)) {
+		struct seccomp_filter_checker_group *freeme = orig;
+
+		for (i = 0; i < freeme->checkers_len; i++)
+			put_seccomp_obj(&freeme->checkers[i]);
+		orig = orig->prev;
+		kfree(freeme);
+	}
+}
+#endif /* CONFIG_SECURITY_SECCOMP */
+
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
 {
 	if (filter) {
@@ -485,6 +547,9 @@ void put_seccomp_filter(struct task_struct *tsk)
 		orig = orig->prev;
 		seccomp_filter_free(freeme);
 	}
+#ifdef CONFIG_SECURITY_SECCOMP
+	put_seccomp_checker_group(tsk->seccomp.checker_group);
+#endif
 }
 
 /**
@@ -813,6 +878,158 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
 }
 #endif
 
+#ifdef CONFIG_SECURITY_SECCOMP
+
+/* Limit checkers number to 64 to be able to show matches with a bitmask. */
+#define SECCOMP_CHECKERS_MAX 64
+
+/* Limit arg group list and their checkers to 256KB. */
+#define SECCOMP_GROUP_CHECKERS_MAX_SIZE (1 << 18)
+
+static long seccomp_add_checker_group(unsigned int flags, const char __user *group)
+{
+	struct seccomp_checker_group kgroup;
+	struct seccomp_checker (*kcheckers)[], *user_checker;
+	struct seccomp_filter_checker_group *filter_group, *walker;
+	struct seccomp_filter_checker *kernel_obj;
+	unsigned int i;
+	unsigned long group_size, kcheckers_size, full_group_size;
+	long result;
+
+	if (!task_no_new_privs(current) &&
+	    security_capable_noaudit(current_cred(),
+				     current_user_ns(), CAP_SYS_ADMIN) != 0)
+		return -EACCES;
+	if (flags != 0 || !group)
+		return -EINVAL;
+
+#ifdef CONFIG_COMPAT
+	if (is_compat_task()) {
+		struct compat_seccomp_checker_group kgroup32;
+
+		if (copy_from_user(&kgroup32, group, sizeof(kgroup32)))
+			return -EFAULT;
+		kgroup.version = kgroup32.version;
+		kgroup.id = kgroup32.id;
+		kgroup.len = kgroup32.len;
+		kgroup.checkers = compat_ptr(kgroup32.checkers);
+	} else			/* Falls through to the if below */
+#endif /* CONFIG_COMPAT */
+	if (copy_from_user(&kgroup, group, sizeof(kgroup)))
+		return -EFAULT;
+
+	if (kgroup.version != 1)
+		return -EINVAL;
+	/* The group ID 0 means no evaluated checkers */
+	if (kgroup.id == 0)
+		return -EINVAL;
+	if (kgroup.len == 0)
+		return -EINVAL;
+	if (kgroup.len > SECCOMP_CHECKERS_MAX)
+		return -E2BIG;
+
+	/* Validate resulting checker_group ID and length. */
+	group_size = sizeof(*filter_group) +
+		kgroup.len * sizeof(filter_group->checkers[0]);
+	full_group_size = group_size;
+	for (walker = current->seccomp.checker_group;
+			walker; walker = walker->prev) {
+		if (walker->id == kgroup.id)
+			return -EINVAL;
+		/* TODO: add penalty? */
+		full_group_size += sizeof(*walker) +
+			walker->checkers_len * sizeof(walker->checkers[0]);
+	}
+	if (full_group_size > SECCOMP_GROUP_CHECKERS_MAX_SIZE)
+		return -ENOMEM;
+
+	kcheckers_size = kgroup.len * sizeof((*kcheckers)[0]);
+	kcheckers = kmalloc(kcheckers_size, GFP_KERNEL);
+	if (!kcheckers)
+		return -ENOMEM;
+
+#ifdef CONFIG_COMPAT
+	if (is_compat_task()) {
+		unsigned int i, kcheckers32_size;
+		struct compat_seccomp_checker (*kcheckers32)[];
+
+		kcheckers32_size = kgroup.len * sizeof((*kcheckers32)[0]);
+		kcheckers32 = kmalloc(kcheckers32_size, GFP_KERNEL);
+		if (!kcheckers32) {
+			result = -ENOMEM;
+			goto free_kcheckers;
+		}
+		if (copy_from_user(kcheckers32, kgroup.checkers, kcheckers32_size)) {
+			kfree(kcheckers32);
+			result = -EFAULT;
+			goto free_kcheckers;
+		}
+		for (i = 0; i < kgroup.len; i++) {
+			(*kcheckers)[i].check = (*kcheckers32)[i].check;
+			(*kcheckers)[i].type = (*kcheckers32)[i].type;
+			(*kcheckers)[i].len = (*kcheckers32)[i].len;
+			(*kcheckers)[i].object_path = compat_ptr((*kcheckers32)[i].checker);
+		}
+		kfree(kcheckers32);
+	} else			/* Falls through to the if below */
+#endif /* CONFIG_COMPAT */
+	if (copy_from_user(kcheckers, kgroup.checkers, kcheckers_size)) {
+		result = -EFAULT;
+		goto free_kcheckers;
+	}
+
+	/* filter_group->checkers must be zeroed to correctly be freed on error */
+	filter_group = kzalloc(group_size, GFP_KERNEL);
+	if (!filter_group) {
+		result = -ENOMEM;
+		goto free_kcheckers;
+	}
+	filter_group->prev = NULL;
+	filter_group->id = kgroup.id;
+	filter_group->checkers_len = kgroup.len;
+	for (i = 0; i < filter_group->checkers_len; i++) {
+		user_checker = &(*kcheckers)[i];
+		kernel_obj = &filter_group->checkers[i];
+		switch (user_checker->check) {
+		case SECCOMP_CHECK_FS_LITERAL:
+		case SECCOMP_CHECK_FS_BENEATH:
+			kernel_obj->check = user_checker->check;
+			result =
+			    seccomp_set_argcheck_fs(user_checker, kernel_obj);
+			if (result)
+				goto free_group;
+			break;
+		default:
+			result = -EINVAL;
+			goto free_group;
+		}
+	}
+
+	atomic_set(&filter_group->usage, 1);
+	filter_group->prev = current->seccomp.checker_group;
+	/* No need to update filter_group->prev->usage because it get one
+	 * reference from this filter but lose one from
+	 * current->seccomp.checker_group.
+	 */
+	current->seccomp.checker_group = filter_group;
+	/* XXX: Return the number of groups? */
+	result = 0;
+	goto free_kcheckers;
+
+free_group:
+	for (i = 0; i < filter_group->checkers_len; i++) {
+		kernel_obj = &filter_group->checkers[i];
+		if (kernel_obj->type)
+			put_seccomp_obj(kernel_obj);
+	}
+	kfree(filter_group);
+
+free_kcheckers:
+	kfree(kcheckers);
+	return result;
+}
+#endif /* CONFIG_SECURITY_SECCOMP */
+
 /* Common entry point for both prctl and syscall. */
 static long do_seccomp(unsigned int op, unsigned int flags,
 		       const char __user *uargs)
@@ -824,6 +1041,10 @@ static long do_seccomp(unsigned int op, unsigned int flags,
 		return seccomp_set_mode_strict();
 	case SECCOMP_SET_MODE_FILTER:
 		return seccomp_set_mode_filter(flags, uargs);
+#ifdef CONFIG_SECURITY_SECCOMP
+	case SECCOMP_ADD_CHECKER_GROUP:
+		return seccomp_add_checker_group(flags, uargs);
+#endif /* CONFIG_SECURITY_SECCOMP */
 	default:
 		return -EINVAL;
 	}
diff --git a/security/seccomp/Makefile b/security/seccomp/Makefile
index f2e848d81138..1ed68b23a922 100644
--- a/security/seccomp/Makefile
+++ b/security/seccomp/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_SECURITY_SECCOMP) := seccomp.o
 
-seccomp-y := lsm.o
+seccomp-y := lsm.o checker_fs.o
diff --git a/security/seccomp/checker_fs.c b/security/seccomp/checker_fs.c
new file mode 100644
index 000000000000..c11efc892de5
--- /dev/null
+++ b/security/seccomp/checker_fs.c
@@ -0,0 +1,102 @@
+/*
+ * Seccomp Linux Security Module - File System Checkers
+ *
+ * Copyright (C) 2016  Mickaël Salaün <mic@xxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/compat.h>
+#include <linux/namei.h>	/* user_lpath() */
+#include <linux/path.h>
+#include <linux/seccomp.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>	/* copy_from_user() */
+
+#ifdef CONFIG_COMPAT
+/* struct seccomp_object_path */
+struct compat_seccomp_object_path {
+	__u32 flags;
+	compat_uptr_t path;	/* const char * */
+};
+#endif
+
+static const u32 path_flags_mask_literal =
+	SECCOMP_OBJFLAG_FS_DENTRY |
+	SECCOMP_OBJFLAG_FS_INODE |
+	SECCOMP_OBJFLAG_FS_DEVICE |
+	SECCOMP_OBJFLAG_FS_MOUNT |
+	SECCOMP_OBJFLAG_FS_NOFOLLOW;
+
+static const u32 path_flags_mask_beneath =
+	SECCOMP_OBJFLAG_FS_DENTRY |
+	SECCOMP_OBJFLAG_FS_INODE |
+	SECCOMP_OBJFLAG_FS_NOFOLLOW;
+
+/* Return true for any error, or false if flags are OK. */
+static bool wrong_check_flags(u32 check, u32 flags)
+{
+	u32 path_flags_mask;
+
+	/* Do not allow insecure check: inode without device */
+	if ((flags & SECCOMP_OBJFLAG_FS_INODE) &&
+	    !(flags & SECCOMP_OBJFLAG_FS_DEVICE))
+		return true;
+
+	switch (check) {
+	case SECCOMP_CHECK_FS_LITERAL:
+		path_flags_mask = path_flags_mask_literal;
+		break;
+	case SECCOMP_CHECK_FS_BENEATH:
+		path_flags_mask = path_flags_mask_beneath;
+		break;
+	default:
+		WARN_ON(1);
+		return true;
+	}
+	/* Need at least one flag, but only in the allowed mask */
+	return !(flags & path_flags_mask) ||
+		((flags | path_flags_mask) != path_flags_mask);
+}
+
+static long set_argtype_path(const struct seccomp_checker *user_checker,
+			     struct seccomp_filter_checker *kernel_checker)
+{
+	struct seccomp_object_path user_cp;
+
+	/* @len is not used for @object_path */
+	if (user_checker->len != 0)
+		return -EINVAL;
+
+#ifdef CONFIG_COMPAT
+	if (is_compat_task()) {
+		struct compat_seccomp_object_path user_cp32;
+
+		if (copy_from_user(&user_cp32, user_checker->object_path, sizeof(user_cp32)))
+			return -EFAULT;
+		user_cp.flags = user_cp32.flags;
+		user_cp.path = compat_ptr(user_cp32.path);
+	} else			/* Falls through to the if below */
+#endif
+	if (copy_from_user(&user_cp, user_checker->object_path, sizeof(user_cp)))
+		return -EFAULT;
+
+	if (wrong_check_flags(kernel_checker->check, user_cp.flags))
+		return -EINVAL;
+	kernel_checker->object_path.flags = user_cp.flags;
+	/* Do not follow symlinks for objects */
+	return user_lpath(user_cp.path, &kernel_checker->object_path.path);
+}
+
+long seccomp_set_argcheck_fs(const struct seccomp_checker *user_checker,
+			     struct seccomp_filter_checker *kernel_checker)
+{
+	switch (user_checker->type) {
+	case SECCOMP_OBJTYPE_PATH:
+		kernel_checker->type = user_checker->type;
+		return set_argtype_path(user_checker, kernel_checker);
+	}
+	return -EINVAL;
+}
-- 
2.8.0.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-api" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux