[RFC PATCH bpf-next 2/5] bpf: add get_reg_val helper

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a helper which reads the value of specified register into memory.

Currently, bpf programs only have access to general-purpose registers
via struct pt_regs. Other registers, like SSE regs %xmm0-15, are
inaccessible, which makes some tracing usecases impossible. For example,
User Statically-Defined Tracing (USDT) probes may use SSE registers to
pass their arguments on x86. While this patch adds support for %xmm0-15
only, the helper is meant to be generic enough to support fetching any
reg.

A useful "value of register" definition for bpf programs is "value of
register before control transfer to kernel". pt_regs gives us this
currently, so it's the default behavior of the new helper. Fetching the
actual _current_ reg value is possible, though, by passing
BPF_GETREG_F_CURRENT flag as part of input.

For SSE regs we try to avoid digging around in task's fpu state by first
reading _current_ value, then checking to see if the state of cpu's
floating point regs matches task's view of them. If so, we can just
return _current_ value.

Further usecases which are straightforward to support, but
unimplemented:
  * using the helper to fetch general-purpose register value.
  currently-unused pt_regs parameter exists for this reason.

  * fetching rdtsc (w/ BPF_GETREG_F_CURRENT)

  * other architectures. s390 specifically might benefit from similar
  fpu reg fetching as USDT library was recently updated to support that
  architecture.

Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>
---
 include/uapi/linux/bpf.h       |  40 +++++++++
 kernel/trace/bpf_trace.c       | 148 +++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.h       |   1 +
 tools/include/uapi/linux/bpf.h |  40 +++++++++
 4 files changed, 229 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 444fe6f1cf35..3ef8f683ed9e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5154,6 +5154,18 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * long bpf_get_reg_val(void *dst, u32 size, u64 getreg_spec, struct pt_regs *regs, struct task_struct *tsk)
+ *	Description
+ *		Store the value of a SSE register specified by *getreg_spec*
+ *		into memory region of size *size* specified by *dst*. *getreg_spec*
+ *		is a combination of BPF_GETREG enum AND BPF_GETREG_F flag e.g.
+ *		(BPF_GETREG_X86_XMM0 << 32) | BPF_GETREG_F_CURRENT.*
+ *	Return
+ *		0 on success
+ *		**-ENOENT** if the system architecture does not have requested reg
+ *		**-EINVAL** if *getreg_spec* is invalid
+ *		**-EINVAL** if *size* != bytes necessary to store requested reg val
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5351,6 +5363,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(get_reg_val),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6318,6 +6331,33 @@ struct bpf_perf_event_value {
 	__u64 running;
 };
 
+/* bpf_get_reg_val register enum */
+enum {
+	BPF_GETREG_X86_XMM0 = 0,
+	BPF_GETREG_X86_XMM1,
+	BPF_GETREG_X86_XMM2,
+	BPF_GETREG_X86_XMM3,
+	BPF_GETREG_X86_XMM4,
+	BPF_GETREG_X86_XMM5,
+	BPF_GETREG_X86_XMM6,
+	BPF_GETREG_X86_XMM7,
+	BPF_GETREG_X86_XMM8,
+	BPF_GETREG_X86_XMM9,
+	BPF_GETREG_X86_XMM10,
+	BPF_GETREG_X86_XMM11,
+	BPF_GETREG_X86_XMM12,
+	BPF_GETREG_X86_XMM13,
+	BPF_GETREG_X86_XMM14,
+	BPF_GETREG_X86_XMM15,
+	__MAX_BPF_GETREG,
+};
+
+/* bpf_get_reg_val flags */
+enum {
+	BPF_GETREG_F_NONE = 0,
+	BPF_GETREG_F_CURRENT = (1U << 0),
+};
+
 enum {
 	BPF_DEVCG_ACC_MKNOD	= (1ULL << 0),
 	BPF_DEVCG_ACC_READ	= (1ULL << 1),
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f15b826f9899..0de7d6b3af5b 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -28,6 +28,10 @@
 
 #include <asm/tlb.h>
 
+#ifdef CONFIG_X86
+#include <asm/fpu/context.h>
+#endif
+
 #include "trace_probe.h"
 #include "trace.h"
 
@@ -1166,6 +1170,148 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+#define XMM_REG_SZ 16
+
+#define __xmm_space_off(regno)				\
+	case BPF_GETREG_X86_XMM ## regno:		\
+		xmm_space_off = regno * 16;		\
+		break;
+
+static long getreg_read_xmm_fxsave(u32 reg, struct task_struct *tsk,
+				   void *data)
+{
+	struct fxregs_state *fxsave;
+	u32 xmm_space_off;
+
+	switch (reg) {
+	__xmm_space_off(0);
+	__xmm_space_off(1);
+	__xmm_space_off(2);
+	__xmm_space_off(3);
+	__xmm_space_off(4);
+	__xmm_space_off(5);
+	__xmm_space_off(6);
+	__xmm_space_off(7);
+#ifdef	CONFIG_X86_64
+	__xmm_space_off(8);
+	__xmm_space_off(9);
+	__xmm_space_off(10);
+	__xmm_space_off(11);
+	__xmm_space_off(12);
+	__xmm_space_off(13);
+	__xmm_space_off(14);
+	__xmm_space_off(15);
+#endif
+	default:
+		return -EINVAL;
+	}
+
+	fxsave = &tsk->thread.fpu.fpstate->regs.fxsave;
+	memcpy(data, (void *)&fxsave->xmm_space + xmm_space_off, XMM_REG_SZ);
+	return 0;
+}
+
+#undef __xmm_space_off
+
+static bool getreg_is_xmm(u32 reg)
+{
+	return reg >= BPF_GETREG_X86_XMM0 && reg <= BPF_GETREG_X86_XMM15;
+}
+
+#define __bpf_sse_read(regno)							\
+	case BPF_GETREG_X86_XMM ## regno:					\
+		asm("movdqa %%xmm" #regno ", %0" : "=m"(*(char *)data));	\
+		break;
+
+static long bpf_read_sse_reg(u32 reg, u32 flags, struct task_struct *tsk,
+			     void *data)
+{
+#ifdef CONFIG_X86
+	unsigned long irq_flags;
+	long err;
+
+	switch (reg) {
+	__bpf_sse_read(0);
+	__bpf_sse_read(1);
+	__bpf_sse_read(2);
+	__bpf_sse_read(3);
+	__bpf_sse_read(4);
+	__bpf_sse_read(5);
+	__bpf_sse_read(6);
+	__bpf_sse_read(7);
+#ifdef CONFIG_X86_64
+	__bpf_sse_read(8);
+	__bpf_sse_read(9);
+	__bpf_sse_read(10);
+	__bpf_sse_read(11);
+	__bpf_sse_read(12);
+	__bpf_sse_read(13);
+	__bpf_sse_read(14);
+	__bpf_sse_read(15);
+#endif /* CONFIG_X86_64 */
+	default:
+		return -EINVAL;
+	}
+
+	if (flags & BPF_GETREG_F_CURRENT)
+		return 0;
+
+	if (!fpregs_state_valid(&tsk->thread.fpu, smp_processor_id())) {
+		local_irq_save(irq_flags);
+		err = getreg_read_xmm_fxsave(reg, tsk, data);
+		local_irq_restore(irq_flags);
+		return err;
+	}
+
+	return 0;
+#else
+	return -ENOENT;
+#endif /* CONFIG_X86 */
+}
+
+#undef __bpf_sse_read
+
+BPF_CALL_5(get_reg_val, void *, dst, u32, size,
+	   u64, getreg_spec, struct pt_regs *, regs,
+	   struct task_struct *, tsk)
+{
+	u32 reg, flags;
+
+	reg = (u32)(getreg_spec >> 32);
+	flags = (u32)getreg_spec;
+	if (reg >= __MAX_BPF_GETREG)
+		return -EINVAL;
+
+	if (getreg_is_xmm(reg)) {
+#ifndef CONFIG_X86
+		return -ENOENT;
+#else
+		if (size != XMM_REG_SZ)
+			return -EINVAL;
+
+		return bpf_read_sse_reg(reg, flags, tsk, dst);
+	}
+
+	return -EINVAL;
+#endif
+}
+
+BTF_ID_LIST(bpf_get_reg_val_ids)
+BTF_ID(struct, pt_regs)
+
+static const struct bpf_func_proto bpf_get_reg_val_proto = {
+	.func	= get_reg_val,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_BTF_ID_OR_NULL,
+	.arg4_btf_id	= &bpf_get_reg_val_ids[0],
+	.arg5_type	= ARG_PTR_TO_BTF_ID_OR_NULL,
+	.arg5_btf_id	= &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
+};
+
 static const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1287,6 +1433,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_find_vma_proto;
 	case BPF_FUNC_trace_vprintk:
 		return bpf_get_trace_vprintk_proto();
+	case BPF_FUNC_get_reg_val:
+		return &bpf_get_reg_val_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h
index 9acbc11ac7bb..b4b55706c2dd 100644
--- a/kernel/trace/bpf_trace.h
+++ b/kernel/trace/bpf_trace.h
@@ -29,6 +29,7 @@ TRACE_EVENT(bpf_trace_printk,
 
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
 #define TRACE_INCLUDE_FILE bpf_trace
 
 #include <trace/define_trace.h>
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 444fe6f1cf35..3ef8f683ed9e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5154,6 +5154,18 @@ union bpf_attr {
  *		if not NULL, is a reference which must be released using its
  *		corresponding release function, or moved into a BPF map before
  *		program exit.
+ *
+ * long bpf_get_reg_val(void *dst, u32 size, u64 getreg_spec, struct pt_regs *regs, struct task_struct *tsk)
+ *	Description
+ *		Store the value of a SSE register specified by *getreg_spec*
+ *		into memory region of size *size* specified by *dst*. *getreg_spec*
+ *		is a combination of BPF_GETREG enum AND BPF_GETREG_F flag e.g.
+ *		(BPF_GETREG_X86_XMM0 << 32) | BPF_GETREG_F_CURRENT.*
+ *	Return
+ *		0 on success
+ *		**-ENOENT** if the system architecture does not have requested reg
+ *		**-EINVAL** if *getreg_spec* is invalid
+ *		**-EINVAL** if *size* != bytes necessary to store requested reg val
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5351,6 +5363,7 @@ union bpf_attr {
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
 	FN(kptr_xchg),			\
+	FN(get_reg_val),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6318,6 +6331,33 @@ struct bpf_perf_event_value {
 	__u64 running;
 };
 
+/* bpf_get_reg_val register enum */
+enum {
+	BPF_GETREG_X86_XMM0 = 0,
+	BPF_GETREG_X86_XMM1,
+	BPF_GETREG_X86_XMM2,
+	BPF_GETREG_X86_XMM3,
+	BPF_GETREG_X86_XMM4,
+	BPF_GETREG_X86_XMM5,
+	BPF_GETREG_X86_XMM6,
+	BPF_GETREG_X86_XMM7,
+	BPF_GETREG_X86_XMM8,
+	BPF_GETREG_X86_XMM9,
+	BPF_GETREG_X86_XMM10,
+	BPF_GETREG_X86_XMM11,
+	BPF_GETREG_X86_XMM12,
+	BPF_GETREG_X86_XMM13,
+	BPF_GETREG_X86_XMM14,
+	BPF_GETREG_X86_XMM15,
+	__MAX_BPF_GETREG,
+};
+
+/* bpf_get_reg_val flags */
+enum {
+	BPF_GETREG_F_NONE = 0,
+	BPF_GETREG_F_CURRENT = (1U << 0),
+};
+
 enum {
 	BPF_DEVCG_ACC_MKNOD	= (1ULL << 0),
 	BPF_DEVCG_ACC_READ	= (1ULL << 1),
-- 
2.30.2





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux