[PATCH] bpf: Add CONFIG_BPF_HELPER_STRICT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In container environment, ebpf helpers could be used maliciously to 
   leak information, DOS, even escape from containers. 
   CONFIG_BPF_HELPER_STRICT is as a mitigation of it. 
   Related Link: https://rolandorange.zone/report.html

Signed-off-by: Roland <kernel.pwn@xxxxxxxxxxx>
---
 include/linux/kernel.h     |  7 ++++++
 include/linux/sched.h      |  4 +++-
 include/uapi/linux/bpf.h   |  4 ++++
 include/uapi/linux/sched.h |  4 ++++
 kernel/bpf/Kconfig         |  6 +++++
 kernel/bpf/syscall.c       | 48 +++++++++++++++++++++++++++++++++++---
 kernel/fork.c              | 13 +++++++++++
 kernel/trace/bpf_trace.c   | 29 +++++++++++++++++++++--
 8 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fe6efb24d..61f7fcbc9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -509,3 +509,10 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	 BUILD_BUG_ON_ZERO((perms) & 2) +					\
 	 (perms))
 #endif
+
+#ifdef CONFIG_BPF_HELPER_STRICT
+# define BPF_PROBE_WRITE_BIT 1
+# define BPF_PROBE_READ_BIT  2
+# define BPF_SEND_SIGNAL_BIT 3
+# define BPF_OVERRIDE_RETURN_BIT 4
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ffb6eb55c..a3ec52056 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -760,7 +760,9 @@ struct task_struct {
 	/* Per task flags (PF_*), defined further below: */
 	unsigned int			flags;
 	unsigned int			ptrace;
-
+#ifdef CONFIG_BPF_HELPER_STRICT
+	unsigned int			bpf_helper_bitfield;
+#endif
 #ifdef CONFIG_SMP
 	int				on_cpu;
 	struct __call_single_node	wake_entry;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 51b9aa640..99a90d0f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -900,6 +900,7 @@ enum bpf_cmd {
 	BPF_ITER_CREATE,
 	BPF_LINK_DETACH,
 	BPF_PROG_BIND_MAP,
+	BPF_HELPER_BITS_SET,
 };
 
 enum bpf_map_type {
@@ -1326,6 +1327,9 @@ union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+#ifdef CONFIG_BPF_HELPER_STRICT
+		__u32 security_helper_bits;
+#endif
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ce..c2fd463be 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -43,6 +43,10 @@
  */
 #define CLONE_NEWTIME	0x00000080	/* New time namespace */
 
+#ifdef CONFIG_BPF_HELPER_STRICT
+#define CLONE_BITFIELD	0x00000040	/* set if bpf_helper_bitfield shared between processes */
+#endif
+
 #ifndef __ASSEMBLY__
 /**
  * struct clone_args - arguments for the clone3 syscall
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 2dfe1079f..c2c2fcf76 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -99,4 +99,10 @@ config BPF_LSM
 
 	  If you are unsure how to answer this question, answer N.
 
+config BPF_HELPER_STRICT
+        bool "Enable BPF HELPER Check bits"
+        depends on BPF_SYSCALL
+        help
+            Enable several check bits for bpf helpers' security improvements.
+
+	     BPF_HELPER_STRICT restricts the function of helpers.
+	     Currently it can be used for restrict override return,
+	     read, write, and send signal.
endmenu # "BPF subsystem"
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7b373a5e8..2f05ea50f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,6 +68,45 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
 #undef BPF_LINK_TYPE
 };
 
+#ifdef CONFIG_BPF_HELPER_STRICT
+static __always_inline int HelperWrite(unsigned int bits)
+{
+	return ((unsigned int)bits & BPF_PROBE_WRITE_BIT) != 0;
+}
+static __always_inline int HelperRead(unsigned int bits)
+{
+	return ((unsigned int)bits & BPF_PROBE_READ_BIT) != 0;
+}
+static __always_inline int HelperSendSignal(unsigned int bits)
+{
+	return ((unsigned int)bits & BPF_SEND_SIGNAL_BIT) != 0;
+}
+static __always_inline int HelperOverrideReturn(unsigned int bits)
+{
+	return ((unsigned int)bits & BPF_OVERRIDE_RETURN_BIT) != 0;
+}
+int bpf_set_security_helper_bits(union bpf_attr *attr)
+{
+	int res;
+	unsigned int bits_to_set;
+	unsigned int expected_bpf_helper_bitfield = 0;
+
+	bits_to_set = attr->security_helper_bits;
+
+	if (HelperWrite(bits_to_set))
+		expected_bpf_helper_bitfield |= 1 << BPF_PROBE_WRITE_BIT;
+	if (HelperRead(bits_to_set))
+		expected_bpf_helper_bitfield |= 1 << BPF_PROBE_READ_BIT;
+	if (HelperSendSignal(bits_to_set))
+		expected_bpf_helper_bitfield |= 1 << BPF_SEND_SIGNAL_BIT;
+	if (HelperOverrideReturn(bits_to_set))
+		expected_bpf_helper_bitfield |= 1 << BPF_OVERRIDE_RETURN_BIT;
+
+	current->bpf_helper_bitfield = expected_bpf_helper_bitfield;
+	res = 0;
+	return res;
+}
+#endif
 /*
  * If we're handed a bigger struct than we know of, ensure all the unknown bits
  * are 0 - i.e. new user-space does not rely on any kernel feature extensions
@@ -4913,7 +4952,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 	union bpf_attr attr;
 	bool capable;
 	int err;
-
+
 	capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
 
 	/* Intent here is for unprivileged_bpf_disabled to block key object
@@ -4925,7 +4964,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 	 * and other operations.
 	 */
 	if (!capable &&
-	    (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
+	    (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD || cmd == BPF_HELPER_BITS_SET))
 		return -EPERM;
 
 	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
@@ -4938,7 +4977,7 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 	if (copy_from_bpfptr(&attr, uattr, size) != 0)
 		return -EFAULT;
 
-	err = security_bpf(cmd, &attr, size);
+	err = security_bpf(cmd, &attr, size);
 	if (err < 0)
 		return err;
 
@@ -5056,6 +5095,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
 	case BPF_PROG_BIND_MAP:
 		err = bpf_prog_bind_map(&attr);
 		break;
+	case BPF_HELPER_BITS_SET:
+		err = bpf_set_security_helper_bits(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa..6168da0b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1998,6 +1998,10 @@ static __latent_entropy struct task_struct *copy_process(
 	const u64 clone_flags = args->flags;
 	struct nsproxy *nsp = current->nsproxy;
 
+#ifdef CONFIG_BPF_HELPER_STRICT
+	unsigned int bitfield = current->bpf_helper_bitfield;
+#endif
+
 	/*
 	 * Don't allow sharing the root directory with processes in a different
 	 * namespace
@@ -2102,6 +2106,7 @@ static __latent_entropy struct task_struct *copy_process(
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
 
+
 	ftrace_graph_init_task(p);
 
 	rt_mutex_init_task(p);
@@ -2490,6 +2495,14 @@ static __latent_entropy struct task_struct *copy_process(
 
 	copy_oom_score_adj(clone_flags, p);
 
+#ifdef CONFIG_BPF_HELPER_STRICT
+	/* Only if explicit set CLONE_BITFIELD or
+	 * the forked process has both CAP_BPF and CAP_SYS_ADMIN,
+	 * we will set the bitfield
+	 */
+	p->bpf_helper_bitfield = (clone_flags & CLONE_BITFIELD) ? bitfield : 0;
+	if (capable(CAP_BPF) && capable(CAP_SYS_ADMIN))
+		p->bpf_helper_bitfield = bitfield;
+#endif
 	return p;
 
 bad_fork_cancel_cgroup:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1ed08967f..5c4232d45 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -39,6 +39,16 @@
 #define bpf_event_rcu_dereference(p)					\
 	rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
 
+#ifdef CONFIG_BPF_HELPER_STRICT
+static bool check_bpf_bitfield(unsigned int flags)
+{
+	unsigned int bits = current->bpf_helper_bitfield;
+
+	if (!(bits & (1 << flags)))
+		return false;
+
+	return true;
+}
+#endif
+
 #ifdef CONFIG_MODULES
 struct bpf_trace_module {
 	struct module *module;
@@ -145,6 +155,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
 {
+#ifdef CONFIG_BPF_HELPER_STRICT
+	if (unlikely(!check_bpf_bitfield(BPF_OVERRIDE_RETURN_BIT)))
+		return -EPERM;
+#endif
 	regs_set_return_value(regs, rc);
 	override_function_with_return(regs);
 	return 0;
@@ -162,8 +176,8 @@ static const struct bpf_func_proto bpf_override_return_proto = {
 static __always_inline int
 bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
 {
-	int ret;
 
+	int ret;
 	ret = copy_from_user_nofault(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
 		memset(dst, 0, size);
@@ -287,6 +301,10 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
 BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
 	   const void *, unsafe_ptr)
 {
+#ifdef CONFIG_BPF_HELPER_STRICT
+	if (unlikely(!check_bpf_bitfield(BPF_PROBE_READ_BIT)))
+		return -EPERM;
+#endif
 	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
 		return bpf_probe_read_user_common(dst, size,
 				(__force void __user *)unsafe_ptr);
@@ -338,7 +356,10 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
 	 * state, when the task or mm are switched. This is specifically
 	 * required to prevent the use of temporary mm.
 	 */
-
+#ifdef CONFIG_BPF_HELPER_STRICT
+	if (unlikely(!check_bpf_bitfield(BPF_PROBE_WRITE_BIT)))
+		return -EPERM;
+#endif
 	if (unlikely(in_interrupt() ||
 		     current->flags & (PF_KTHREAD | PF_EXITING)))
 		return -EPERM;
@@ -843,6 +864,10 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
 	 * permitted in order to send signal to the current
 	 * task.
 	 */
+#ifdef CONFIG_BPF_HELPER_STRICT
+	if (unlikely(!check_bpf_bitfield(BPF_SEND_SIGNAL_BIT)))
+		return -EPERM;
+#endif
 	if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
 		return -EPERM;
 	if (unlikely(!nmi_uaccess_okay()))
-- 
2.25.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux