By improving the BPF_LINK_UPDATE command of bpf(), it should allow you to conveniently switch between different struct_ops on a single bpf_link. This would enable smoother transitions from one struct_ops to another. The struct_ops maps passing along with BPF_LINK_UPDATE should have the BPF_F_LINK flag. Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxxxx> --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 21 +++++++++++---- kernel/bpf/bpf_struct_ops.c | 48 +++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++ net/ipv4/bpf_tcp_ca.c | 6 +++++ tools/include/uapi/linux/bpf.h | 21 +++++++++++---- 6 files changed, 122 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2faf01fa3f04..29287a2d8b1b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1476,6 +1476,8 @@ struct bpf_link_ops { void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); int (*fill_link_info)(const struct bpf_link *link, struct bpf_link_info *info); + int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *old_map); }; struct bpf_tramp_link { @@ -1518,6 +1520,7 @@ struct bpf_struct_ops { void *kdata, const void *udata); int (*reg)(void *kdata); void (*unreg)(void *kdata); + int (*update)(void *kdata, void *old_kdata); int (*validate)(void *kdata); const struct btf_type *type; const struct btf_type *value_type; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 42f40ee083bf..e3d3b5160d26 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 5e77d1d4a7f5..71dd5d5d3ce5 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -65,6 +65,8 @@ struct bpf_struct_ops_link { struct bpf_map __rcu *map; }; +static DEFINE_MUTEX(update_mutex); + #define VALUE_PREFIX "bpf_struct_ops_" #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) @@ -660,7 +662,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) if (attr->value_size != vt->size) return ERR_PTR(-EINVAL); - if (attr->map_flags & BPF_F_LINK && !st_ops->validate) + if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update)) return ERR_PTR(-EOPNOTSUPP); t = st_ops->type; @@ -806,10 +808,54 @@ static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link, return 0; } +static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map *new_map, + struct bpf_map *expected_old_map) +{ + struct bpf_struct_ops_map *st_map, *old_st_map; + struct bpf_map *old_map; + struct bpf_struct_ops_link *st_link; + int err = 0; + + st_link = container_of(link, struct bpf_struct_ops_link, link); + st_map = container_of(new_map, struct bpf_struct_ops_map, map); + + if (!bpf_struct_ops_valid_to_reg(new_map)) + return -EINVAL; + + mutex_lock(&update_mutex); + + old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex)); + if (expected_old_map && old_map != expected_old_map) { + err = -EINVAL; + goto err_out; + } + + old_st_map = container_of(old_map, struct bpf_struct_ops_map, map); + /* The new and old struct_ops must be the same type. */ + if (st_map->st_ops != old_st_map->st_ops) { + err = -EINVAL; + goto err_out; + } + + err = st_map->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + if (err) + goto err_out; + + bpf_map_inc(new_map); + rcu_assign_pointer(st_link->map, new_map); + bpf_map_put(old_map); + +err_out: + mutex_unlock(&update_mutex); + + return err; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, + .update_map = bpf_struct_ops_map_link_update, }; int bpf_struct_ops_link_create(union bpf_attr *attr) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 21f76698875c..b4d758fa5981 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4682,6 +4682,35 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) return ret; } +static int link_update_map(struct bpf_link *link, union bpf_attr *attr) +{ + struct bpf_map *new_map, *old_map = NULL; + int ret; + + new_map = bpf_map_get(attr->link_update.new_map_fd); + if (IS_ERR(new_map)) + return -EINVAL; + + if (attr->link_update.flags & BPF_F_REPLACE) { + old_map = bpf_map_get(attr->link_update.old_map_fd); + if (IS_ERR(old_map)) { + ret = -EINVAL; + goto out_put; + } + } else if (attr->link_update.old_map_fd) { + ret = -EINVAL; + goto out_put; + } + + ret = link->ops->update_map(link, new_map, old_map); + + if (old_map) + bpf_map_put(old_map); +out_put: + bpf_map_put(new_map); + return ret; +} + #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd static int link_update(union bpf_attr *attr) @@ -4702,6 +4731,11 @@ static int link_update(union bpf_attr *attr) if (IS_ERR(link)) return PTR_ERR(link); + if (link->ops->update_map) { + ret = link_update_map(link, attr); + goto out_put_link; + } + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); if (IS_ERR(new_prog)) { ret = PTR_ERR(new_prog); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index bbbd5eb94db2..e8b27826283e 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -264,6 +264,11 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +{ + return tcp_update_congestion_control(kdata, old_kdata); +} + static int bpf_tcp_ca_validate(void *kdata) { return tcp_validate_congestion_control(kdata); @@ -273,6 +278,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, + .update = bpf_tcp_ca_update, .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9cf1deaf21f2..d6c5a022ae28 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1555,12 +1555,23 @@ union bpf_attr { struct { /* struct used by BPF_LINK_UPDATE command */ __u32 link_fd; /* link fd */ - /* new program fd to update link with */ - __u32 new_prog_fd; + union { + /* new program fd to update link with */ + __u32 new_prog_fd; + /* new struct_ops map fd to update link with */ + __u32 new_map_fd; + }; __u32 flags; /* extra flags */ - /* expected link's program fd; is specified only if - * BPF_F_REPLACE flag is set in flags */ - __u32 old_prog_fd; + union { + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags. + */ + __u32 old_prog_fd; + /* expected link's map fd; is specified only + * if BPF_F_REPLACE flag is set. + */ + __u32 old_map_fd; + }; } link_update; struct { -- 2.34.1