[RFC PATCH bpf-next] bpf: Introduce bpf_timer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Alexei Starovoitov <ast@xxxxxxxxxx>

Introduce 'struct bpf_timer' that can be embedded in most BPF map types
and helpers to operate on it:
long bpf_timer_init(struct bpf_timer *timer, void *callback, int flags)
long bpf_timer_mod(struct bpf_timer *timer, u64 msecs)
long bpf_timer_del(struct bpf_timer *timer)

Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
This is work in progress, but gives an idea on how API will look.
---
 include/linux/bpf.h                           |   1 +
 include/uapi/linux/bpf.h                      |  25 ++++
 kernel/bpf/helpers.c                          | 106 +++++++++++++++++
 kernel/bpf/verifier.c                         | 110 ++++++++++++++++++
 kernel/trace/bpf_trace.c                      |   2 +-
 scripts/bpf_doc.py                            |   2 +
 tools/include/uapi/linux/bpf.h                |  25 ++++
 .../testing/selftests/bpf/prog_tests/timer.c  |  42 +++++++
 tools/testing/selftests/bpf/progs/timer.c     |  53 +++++++++
 9 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/timer.c
 create mode 100644 tools/testing/selftests/bpf/progs/timer.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dc44ba97584..18e09cc0c410 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -312,6 +312,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
 	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
 	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
+	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
 	__BPF_ARG_TYPE_MAX,
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 418b9b813d65..c95d7854d9fb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4761,6 +4761,24 @@ union bpf_attr {
  * 		Execute close syscall for given FD.
  * 	Return
  * 		A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, void *callback, int flags)
+ *	Description
+ *		Initialize the timer to call given static function.
+ *	Return
+ *		zero
+ *
+ * long bpf_timer_mod(struct bpf_timer *timer, u64 msecs)
+ *	Description
+ *		Set the timer expiration N msecs from the current time.
+ *	Return
+ *		zero
+ *
+ * long bpf_timer_del(struct bpf_timer *timer)
+ *	Description
+ *		Deactivate the timer.
+ *	Return
+ *		zero
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4932,6 +4950,9 @@ union bpf_attr {
 	FN(sys_bpf),			\
 	FN(btf_find_by_name_kind),	\
 	FN(sys_close),			\
+	FN(timer_init),			\
+	FN(timer_mod),			\
+	FN(timer_del),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6038,6 +6059,10 @@ struct bpf_spin_lock {
 	__u32	val;
 };
 
+struct bpf_timer {
+	__u64 opaque;
+};
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 544773970dbc..8ef0ad23c991 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -985,6 +985,106 @@ const struct bpf_func_proto bpf_snprintf_proto = {
 	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
+struct bpf_timer_list {
+	struct timer_list tl;
+	struct bpf_map *map;
+	struct bpf_prog *prog;
+	void *callback_fn;
+	void *key;
+	void *value;
+};
+
+static void timer_cb(struct timer_list *timer)
+{
+	struct bpf_timer_list *tl = from_timer(tl, timer, tl);
+	struct bpf_map *map;
+	int ret;
+
+	ret = BPF_CAST_CALL(tl->callback_fn)((u64)(long)tl->map,
+					     (u64)(long)tl->key,
+					     (u64)(long)tl->value, 0, 0);
+	WARN_ON(ret != 0); /* todo: define 0 vs 1 or disallow 1 in the verifier */
+	bpf_prog_put(tl->prog);
+}
+
+BPF_CALL_5(bpf_timer_init, struct bpf_timer *, timer, void *, cb, int, flags,
+	   struct bpf_map *, map, struct bpf_prog *, prog)
+{
+	struct bpf_timer_list *tl;
+
+	if (timer->opaque)
+		return -EBUSY;
+	tl = kcalloc(1, sizeof(*tl), GFP_ATOMIC);
+	if (!tl)
+		return -ENOMEM;
+	tl->callback_fn = cb;
+	tl->value = (void *)timer /* - offset of bpf_timer inside elem */;
+	tl->key = tl->value - round_up(map->key_size, 8);
+	tl->map = map;
+	tl->prog = prog;
+	timer_setup(&tl->tl, timer_cb, 0);
+	timer->opaque = (long)tl;
+	return 0;
+}
+
+const struct bpf_func_proto bpf_timer_init_proto = {
+	.func		= bpf_timer_init,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_TIMER,
+	.arg2_type	= ARG_PTR_TO_FUNC,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_timer_mod, struct bpf_timer *, timer, u64, msecs)
+{
+	struct bpf_timer_list *tl;
+
+	tl = (struct bpf_timer_list *)timer->opaque;
+	if (!tl)
+		return -EINVAL;
+	/* keep the prog alive until callback is invoked */
+	if (!mod_timer(&tl->tl, jiffies + msecs_to_jiffies(msecs))) {
+		/* The timer was inactive.
+		 * Keep the prog alive until callback is invoked
+		 */
+		bpf_prog_inc(tl->prog);
+	}
+	return 0;
+}
+
+const struct bpf_func_proto bpf_timer_mod_proto = {
+	.func		= bpf_timer_mod,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_TIMER,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_timer_del, struct bpf_timer *, timer)
+{
+	struct bpf_timer_list *tl;
+
+	tl = (struct bpf_timer_list *)timer->opaque;
+	if (!tl)
+		return -EINVAL;
+	if (del_timer(&tl->tl)) {
+		/* The timer was active,
+		 * drop the prog refcnt, since callback
+		 * will not be invoked.
+		 */
+		bpf_prog_put(tl->prog);
+	}
+	return 0;
+}
+
+const struct bpf_func_proto bpf_timer_del_proto = {
+	.func		= bpf_timer_del,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_TIMER,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -1033,6 +1133,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ringbuf_query_proto;
 	case BPF_FUNC_for_each_map_elem:
 		return &bpf_for_each_map_elem_proto;
+	case BPF_FUNC_timer_init:
+		return &bpf_timer_init_proto;
+	case BPF_FUNC_timer_mod:
+		return &bpf_timer_mod_proto;
+	case BPF_FUNC_timer_del:
+		return &bpf_timer_del_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9189eecb26dd..606c713be60a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4656,6 +4656,35 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
 	return 0;
 }
 
+static int process_timer_func(struct bpf_verifier_env *env, int regno,
+			      struct bpf_call_arg_meta *meta)
+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+	bool is_const = tnum_is_const(reg->var_off);
+	struct bpf_map *map = reg->map_ptr;
+	u64 val = reg->var_off.value;
+
+	if (!is_const) {
+		verbose(env,
+			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
+			regno);
+		return -EINVAL;
+	}
+	if (!map->btf) {
+		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
+			map->name);
+		return -EINVAL;
+	}
+	if (val) {
+		/* todo: relax this requirement */
+		verbose(env, "bpf_timer field can only be first in the map value element\n");
+		return -EINVAL;
+	}
+	WARN_ON(meta->map_ptr);
+	meta->map_ptr = map;
+	return 0;
+}
+
 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
 {
 	return type == ARG_PTR_TO_MEM ||
@@ -4788,6 +4817,7 @@ static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PER
 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -4819,6 +4849,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
 	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
+	[ARG_PTR_TO_TIMER]		= &timer_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -5000,6 +5031,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			verbose(env, "verifier internal error\n");
 			return -EFAULT;
 		}
+	} else if (arg_type == ARG_PTR_TO_TIMER) {
+		if (process_timer_func(env, regno, meta))
+			return -EACCES;
 	} else if (arg_type == ARG_PTR_TO_FUNC) {
 		meta->subprogno = reg->subprogno;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
@@ -5742,6 +5776,43 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int set_timer_init_callback_state(struct bpf_verifier_env *env,
+					 struct bpf_func_state *caller,
+					 struct bpf_func_state *callee,
+					 int insn_idx)
+{
+	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
+	struct bpf_map *map_ptr;
+
+	if (bpf_map_ptr_poisoned(insn_aux)) {
+		verbose(env, "bpf_timer_init abusing map_ptr\n");
+		return -EINVAL;
+	}
+
+	map_ptr = BPF_MAP_PTR(insn_aux->map_ptr_state);
+
+	/* bpf_timer_init(struct bpf_timer *timer, void *callback_fn, u64 flags);
+	 * callback_fn(struct bpf_map *map, void *key, void *value);
+	 */
+	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+	callee->regs[BPF_REG_1].map_ptr = map_ptr;
+
+	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+	callee->regs[BPF_REG_2].map_ptr = map_ptr;
+
+	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+	callee->regs[BPF_REG_3].map_ptr = map_ptr;
+
+	/* unused */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+	callee->in_callback_fn = true;
+	return 0;
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
@@ -5837,6 +5908,7 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	    func_id != BPF_FUNC_map_pop_elem &&
 	    func_id != BPF_FUNC_map_peek_elem &&
 	    func_id != BPF_FUNC_for_each_map_elem &&
+	    func_id != BPF_FUNC_timer_init &&
 	    func_id != BPF_FUNC_redirect_map)
 		return 0;
 
@@ -6069,6 +6141,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			return -EINVAL;
 	}
 
+	if (func_id == BPF_FUNC_timer_init) {
+		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+					set_timer_init_callback_state);
+		if (err < 0)
+			return -EINVAL;
+	}
+
 	if (func_id == BPF_FUNC_snprintf) {
 		err = check_bpf_snprintf_call(env, regs);
 		if (err < 0)
@@ -12526,6 +12605,37 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			insn      = new_prog->insnsi + i + delta;
 			continue;
 		}
+		if (insn->imm == BPF_FUNC_timer_init) {
+
+			aux = &env->insn_aux_data[i + delta];
+			if (bpf_map_ptr_poisoned(aux)) {
+				verbose(env, "bpf_timer_init abusing map_ptr\n");
+				return -EINVAL;
+			}
+			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
+			{
+				struct bpf_insn ld_addrs[4] = {
+					BPF_LD_IMM64(BPF_REG_4, (long)map_ptr),
+					BPF_LD_IMM64(BPF_REG_5, (long)prog),
+				};
+
+				insn_buf[0] = ld_addrs[0];
+				insn_buf[1] = ld_addrs[1];
+				insn_buf[2] = ld_addrs[2];
+				insn_buf[3] = ld_addrs[3];
+			}
+			insn_buf[4] = *insn;
+			cnt = 5;
+
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+			goto patch_call_imm;
+		}
 
 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
 		 * and other inlining handlers are currently limited to 64 bit
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d2d7cf6cfe83..453a46c2d732 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1065,7 +1065,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_snprintf:
 		return &bpf_snprintf_proto;
 	default:
-		return NULL;
+		return bpf_base_func_proto(func_id);
 	}
 }
 
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 2d94025b38e9..00ac7b79cddb 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -547,6 +547,7 @@ COMMANDS
             'struct inode',
             'struct socket',
             'struct file',
+            'struct bpf_timer',
     ]
     known_types = {
             '...',
@@ -594,6 +595,7 @@ COMMANDS
             'struct inode',
             'struct socket',
             'struct file',
+            'struct bpf_timer',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 418b9b813d65..c95d7854d9fb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4761,6 +4761,24 @@ union bpf_attr {
  * 		Execute close syscall for given FD.
  * 	Return
  * 		A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, void *callback, int flags)
+ *	Description
+ *		Initialize the timer to call given static function.
+ *	Return
+ *		zero
+ *
+ * long bpf_timer_mod(struct bpf_timer *timer, u64 msecs)
+ *	Description
+ *		Set the timer expiration N msecs from the current time.
+ *	Return
+ *		zero
+ *
+ * long bpf_timer_del(struct bpf_timer *timer)
+ *	Description
+ *		Deactivate the timer.
+ *	Return
+ *		zero
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4932,6 +4950,9 @@ union bpf_attr {
 	FN(sys_bpf),			\
 	FN(btf_find_by_name_kind),	\
 	FN(sys_close),			\
+	FN(timer_init),			\
+	FN(timer_mod),			\
+	FN(timer_del),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6038,6 +6059,10 @@ struct bpf_spin_lock {
 	__u32	val;
 };
 
+struct bpf_timer {
+	__u64 opaque;
+};
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644
index 000000000000..6b7a16a54e70
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+
+static int timer(struct timer *timer_skel)
+{
+	int err, prog_fd;
+	__u32 duration = 0, retval;
+
+	err = timer__attach(timer_skel);
+	if (!ASSERT_OK(err, "timer_attach"))
+		return err;
+
+	prog_fd = bpf_program__fd(timer_skel->progs.test1);
+	err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+				NULL, NULL, &retval, &duration);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(retval, 0, "test_run");
+
+	ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+	usleep(50 * 1000); /* 10 msecs should be enough, but give it extra */
+	ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+
+	timer__detach(timer_skel);
+	return 0;
+}
+
+void test_timer(void)
+{
+	struct timer *timer_skel = NULL;
+	int err;
+
+	timer_skel = timer__open_and_load();
+	if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+		goto cleanup;
+
+	err = timer(timer_skel);
+	ASSERT_OK(err, "timer");
+cleanup:
+	timer__destroy(timer_skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644
index 000000000000..2cf0634f10c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct map_elem {
+	struct bpf_timer timer;
+	int counter;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1000);
+	__type(key, int);
+	__type(value, struct map_elem);
+} hmap SEC(".maps");
+
+__u64 callback_check = 52;
+
+static int timer_cb(struct bpf_map *map, int *key, struct map_elem *val)
+{
+	callback_check--;
+	if (--val->counter)
+		/* re-arm the timer again to execute after 1 msec */
+		bpf_timer_mod(&val->timer, 1);
+	return 0;
+}
+
+int bpf_timer_test(void)
+{
+	struct map_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&hmap, &key);
+	if (val) {
+		bpf_timer_init(&val->timer, timer_cb, 0);
+		bpf_timer_mod(&val->timer, 1);
+	}
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+	struct map_elem val = {};
+	int key = 0;
+
+	val.counter = 10, /* number of times to trigger timer_cb */
+	bpf_map_update_elem(&hmap, &key, &val, 0);
+	return bpf_timer_test();
+}
-- 
2.30.2




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux