On 3/8/23 14:21, Martin KaFai Lau wrote:
On 3/7/23 4:50 PM, Kui-Feng Lee wrote:
By improving the BPF_LINK_UPDATE command of bpf(), it should allow you
to conveniently switch between different struct_ops on a single
bpf_link. This would enable smoother transitions from one struct_ops
to another.
The struct_ops maps passing along with BPF_LINK_UPDATE should have the
BPF_F_LINK flag.
Signed-off-by: Kui-Feng Lee <kuifeng@xxxxxxxx>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 8 ++++--
kernel/bpf/bpf_struct_ops.c | 46 ++++++++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 43 ++++++++++++++++++++++++++++---
tools/include/uapi/linux/bpf.h | 7 +++++-
5 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index afca6c526fe4..29d555a82bad 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1470,6 +1470,7 @@ struct bpf_link_ops {
void (*show_fdinfo)(const struct bpf_link *link, struct seq_file
*seq);
int (*fill_link_info)(const struct bpf_link *link,
struct bpf_link_info *info);
+ int (*update_map)(struct bpf_link *link, struct bpf_map *new_map);
};
struct bpf_tramp_link {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9fc7b8af3c4..edef9cf7d596 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1555,8 +1555,12 @@ union bpf_attr {
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
- /* new program fd to update link with */
- __u32 new_prog_fd;
+ union {
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ /* new struct_ops map fd to update link with */
+ __u32 new_map_fd;
+ };
__u32 flags; /* extra flags */
/* expected link's program fd; is specified only if
* BPF_F_REPLACE flag is set in flags */
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 5a7e86cf67b5..79e663869e51 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -775,10 +775,56 @@ static int
bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
return 0;
}
+static int bpf_struct_ops_map_link_update(struct bpf_link *link,
struct bpf_map *new_map)
+{
+ struct bpf_struct_ops_value *kvalue;
+ struct bpf_struct_ops_map *st_map, *old_st_map;
+ struct bpf_struct_ops_link *st_link;
+ struct bpf_map *old_map;
+ int err = 0;
+
+ if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS ||
+ !(new_map->map_flags & BPF_F_LINK))
+ return -EINVAL;
+
+ mutex_lock(&update_mutex);
+
+ st_link = container_of(link, struct bpf_struct_ops_link, link);
+
+ /* The new and old struct_ops must be the same type. */
+ st_map = container_of(new_map, struct bpf_struct_ops_map, map);
nit. move the st_link and st_map init out of the lock.
Ok
+
+ old_map = st_link->map;
rcu_dereference_protected(...)
+ old_st_map = container_of(old_map, struct bpf_struct_ops_map, map);
+ if (st_map->st_ops != old_st_map->st_ops ||
+ /* Pair with smp_store_release() during map_update */
+ smp_load_acquire(&st_map->kvalue.state) !=
BPF_STRUCT_OPS_STATE_READY) {
nit. test the smp_load_acquire(&st_map...) outside of the lock.
Do it together with the new_map checking at the beginning of the func.
Ok
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ kvalue = &st_map->kvalue;
+
+ err = st_map->st_ops->update(kvalue->data, old_st_map->kvalue.data);
+ if (err)
+ goto err_out;
+
+ bpf_map_inc(new_map);
+ rcu_assign_pointer(st_link->map, new_map);
+
+ bpf_map_put(old_map);
+
+err_out:
+ mutex_unlock(&update_mutex);
+
+ return err;
+}
+
static const struct bpf_link_ops bpf_struct_ops_map_lops = {
.dealloc = bpf_struct_ops_map_link_dealloc,
.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
+ .update_map = bpf_struct_ops_map_link_update,
};
int bpf_struct_ops_link_create(union bpf_attr *attr)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3a4503987a48..c087dd2e2c08 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4658,6 +4658,30 @@ static int link_create(union bpf_attr *attr,
bpfptr_t uattr)
return ret;
}
+static int link_update_map(struct bpf_link *link, union bpf_attr *attr)
+{
+ struct bpf_map *new_map;
+ int ret = 0;
+
+ new_map = bpf_map_get(attr->link_update.new_map_fd);
+ if (IS_ERR(new_map))
+ return -EINVAL;
+
+ if (new_map->map_type != BPF_MAP_TYPE_STRUCT_OPS) {
This is unnecessary test. The individual '.update_map()' should test for
its own map_type and the new bpf_struct_ops_map_link_update() does test it.
Will remove it.
+ ret = -EINVAL;
+ goto out_put_map;
+ }
+
+ if (link->ops->update_map)
This has just been tested in link_update() before calling
link_update_map().
Sure
+ ret = link->ops->update_map(link, new_map);
+ else
+ ret = -EINVAL;
+
+out_put_map:
+ bpf_map_put(new_map);
+ return ret;
+}
+
#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
static int link_update(union bpf_attr *attr)
@@ -4670,14 +4694,25 @@ static int link_update(union bpf_attr *attr)
if (CHECK_ATTR(BPF_LINK_UPDATE))
return -EINVAL;
- flags = attr->link_update.flags;
- if (flags & ~BPF_F_REPLACE)
- return -EINVAL;
-
link = bpf_link_get_from_fd(attr->link_update.link_fd);
if (IS_ERR(link))
return PTR_ERR(link);
+ flags = attr->link_update.flags;
+
+ if (link->ops->update_map) {
+ if (flags) /* always replace the existing one */
+ ret = -EINVAL;
+ else
+ ret = link_update_map(link, attr);
+ goto out_put_link;
+ }
+
+ if (flags & ~BPF_F_REPLACE) {
+ ret = -EINVAL;
+ goto out_put_link;
+ }
+
new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
if (IS_ERR(new_prog)) {
ret = PTR_ERR(new_prog);