On Mon, Jul 11, 2022 at 2:48 PM Delyan Kratunov <delyank@xxxxxx> wrote: > > Similarly to bpf_timer, bpf_delayed_work represents a callback that will > be executed at a later time, in a different execution context. > > Its treatment in maps is practically the same as timers (to a degree > that perhaps calls for refactoring), except releasing the work does not > need to release any resources - we will wait for pending executions in > the program destruction path. > > Signed-off-by: Delyan Kratunov <delyank@xxxxxx> > --- > include/linux/bpf.h | 9 ++++++++- > include/linux/btf.h | 1 + > include/uapi/linux/bpf.h | 8 ++++++++ > kernel/bpf/btf.c | 21 +++++++++++++++++++++ > kernel/bpf/syscall.c | 24 ++++++++++++++++++++++-- > kernel/bpf/verifier.c | 9 +++++++++ > tools/include/uapi/linux/bpf.h | 8 ++++++++ > 7 files changed, 77 insertions(+), 3 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 0edd7d2c0064..ad9d2cfb0411 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -164,7 +164,8 @@ enum { > BPF_MAP_VALUE_OFF_MAX = 8, > BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + > 1 + /* for bpf_spin_lock */ > - 1, /* for bpf_timer */ > + 1 + /* for bpf_timer */ > + 1, /* for bpf_delayed_work */ > }; > > enum bpf_kptr_type { > @@ -212,6 +213,7 @@ struct bpf_map { > int spin_lock_off; /* >=0 valid offset, <0 error */ > struct bpf_map_value_off *kptr_off_tab; > int timer_off; /* >=0 valid offset, <0 error */ > + int delayed_work_off; /* >=0 valid offset, <0 error */ > u32 id; > int numa_node; > u32 btf_key_type_id; > @@ -256,6 +258,11 @@ static inline bool map_value_has_timer(const struct bpf_map *map) > return map->timer_off >= 0; > } > > +static inline bool map_value_has_delayed_work(const struct bpf_map *map) > +{ > + return map->delayed_work_off >= 0; > +} > + > static inline bool map_value_has_kptrs(const struct bpf_map *map) > { > return !IS_ERR_OR_NULL(map->kptr_off_tab); > diff --git a/include/linux/btf.h b/include/linux/btf.h > index 1bfed7fa0428..2b8f473a6aa0 100644 > --- a/include/linux/btf.h > +++ b/include/linux/btf.h > @@ -132,6 +132,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, > u32 expected_offset, u32 expected_size); > int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); > int btf_find_timer(const struct btf *btf, const struct btf_type *t); > +int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t); > struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, > const struct btf_type *t); > bool btf_type_is_void(const struct btf_type *t); > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index e81362891596..d68fc4f472f1 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -6691,6 +6691,14 @@ struct bpf_dynptr { > __u64 :64; > } __attribute__((aligned(8))); > > +struct bpf_delayed_work { > + __u64 :64; > + __u64 :64; > + __u64 :64; > + __u64 :64; > + __u64 :64; > +} __attribute__((aligned(8))); > + > struct bpf_sysctl { > __u32 write; /* Sysctl is being read (= 0) or written (= 1). > * Allows 1,2,4-byte read, but no write. > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c > index f08037c31dd7..e4ab52cc25fe 100644 > --- a/kernel/bpf/btf.c > +++ b/kernel/bpf/btf.c > @@ -3196,6 +3196,7 @@ enum btf_field_type { > BTF_FIELD_SPIN_LOCK, > BTF_FIELD_TIMER, > BTF_FIELD_KPTR, > + BTF_FIELD_DELAYED_WORK, > }; > > enum { > @@ -3283,6 +3284,7 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t > switch (field_type) { > case BTF_FIELD_SPIN_LOCK: > case BTF_FIELD_TIMER: > + case BTF_FIELD_DELAYED_WORK: > ret = btf_find_struct(btf, member_type, off, sz, > idx < info_cnt ? &info[idx] : &tmp); > if (ret < 0) > @@ -3333,6 +3335,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, > switch (field_type) { > case BTF_FIELD_SPIN_LOCK: > case BTF_FIELD_TIMER: > + case BTF_FIELD_DELAYED_WORK: > ret = btf_find_struct(btf, var_type, off, sz, > idx < info_cnt ? &info[idx] : &tmp); > if (ret < 0) > @@ -3375,6 +3378,11 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t, > sz = sizeof(struct bpf_timer); > align = __alignof__(struct bpf_timer); > break; > + case BTF_FIELD_DELAYED_WORK: > + name = "bpf_delayed_work"; > + sz = sizeof(struct bpf_delayed_work); > + align = __alignof__(struct bpf_delayed_work); > + break; > case BTF_FIELD_KPTR: > name = NULL; > sz = sizeof(u64); > @@ -3421,6 +3429,19 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t) > return info.off; > } > > +int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t) > +{ > + struct btf_field_info info; > + int ret; > + > + ret = btf_find_field(btf, t, BTF_FIELD_DELAYED_WORK, &info, 1); > + if (ret < 0) > + return ret; > + if (!ret) > + return -ENOENT; > + return info.off; > +} > + > struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, > const struct btf_type *t) > { > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 7d5af5b99f0d..041972305344 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -914,10 +914,11 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map) > bool has_spin_lock = map_value_has_spin_lock(map); > bool has_timer = map_value_has_timer(map); > bool has_kptrs = map_value_has_kptrs(map); > + bool has_delayed_work = map_value_has_delayed_work(map); > struct bpf_map_off_arr *off_arr; > u32 i; > > - if (!has_spin_lock && !has_timer && !has_kptrs) { > + if (!has_spin_lock && !has_timer && !has_kptrs && !has_delayed_work) { > map->off_arr = NULL; > return 0; > } > @@ -953,6 +954,13 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map) > } > off_arr->cnt += tab->nr_off; > } > + if (has_delayed_work) { > + i = off_arr->cnt; > + > + off_arr->field_off[i] = map->delayed_work_off; > + off_arr->field_sz[i] = sizeof(struct bpf_delayed_work); > + off_arr->cnt++; > + } > > if (off_arr->cnt == 1) > return 0; > @@ -1014,6 +1022,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, > return -EOPNOTSUPP; > } > > + map->delayed_work_off = btf_find_delayed_work(btf, value_type); > + if (map_value_has_delayed_work(map)) { > + if (map->map_flags & BPF_F_RDONLY_PROG) > + return -EACCES; > + if (map->map_type != BPF_MAP_TYPE_HASH && > + map->map_type != BPF_MAP_TYPE_LRU_HASH && > + map->map_type != BPF_MAP_TYPE_ARRAY) > + return -EOPNOTSUPP; > + } > + > map->kptr_off_tab = btf_parse_kptrs(btf, value_type); > if (map_value_has_kptrs(map)) { > if (!bpf_capable()) { > @@ -1095,6 +1113,7 @@ static int map_create(union bpf_attr *attr) > > map->spin_lock_off = -EINVAL; > map->timer_off = -EINVAL; > + map->delayed_work_off = -EINVAL; > if (attr->btf_key_type_id || attr->btf_value_type_id || > /* Even the map's value is a kernel's struct, > * the bpf_prog.o must have BTF to begin with > @@ -1863,7 +1882,8 @@ static int map_freeze(const union bpf_attr *attr) > return PTR_ERR(map); > > if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS || > - map_value_has_timer(map) || map_value_has_kptrs(map)) { > + map_value_has_timer(map) || map_value_has_kptrs(map) || > + map_value_has_delayed_work(map)) { not introduced by you, but shouldn't this check also check map_value_has_spinlock()? > fdput(f); > return -ENOTSUPP; > } Also check if you need to modify bpf_map_mmap? > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 2859901ffbe3..9fd311b7a1ff 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -3817,6 +3817,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, > return -EACCES; > } > } > + if (map_value_has_delayed_work(map) && src == ACCESS_DIRECT) { > + u32 t = map->delayed_work_off; > + > + if (reg->smin_value + off < t + sizeof(struct bpf_delayed_work) && > + t < reg->umax_value + off + size) { > + verbose(env, "bpf_delayed_work cannot be accessed directly by load/store regno=%d off=%d\n", regno, off); > + return -EACCES; > + } > + } > if (map_value_has_kptrs(map)) { > struct bpf_map_value_off *tab = map->kptr_off_tab; > int i; > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index e81362891596..d68fc4f472f1 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -6691,6 +6691,14 @@ struct bpf_dynptr { > __u64 :64; > } __attribute__((aligned(8))); > > +struct bpf_delayed_work { > + __u64 :64; > + __u64 :64; > + __u64 :64; > + __u64 :64; > + __u64 :64; > +} __attribute__((aligned(8))); > + > struct bpf_sysctl { > __u32 write; /* Sysctl is being read (= 0) or written (= 1). > * Allows 1,2,4-byte read, but no write. > -- > 2.36.1