On 11/12/2024 7:04 AM, Martin KaFai Lau wrote:
On 11/11/24 4:16 AM, Xu Kuohai wrote:
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index e99fce81e916..d6dd56fc80d8 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
struct bpf_struct_ops_map {
struct bpf_map map;
- struct rcu_head rcu;
Since it needs a respin (more on it later), it will be useful to separate this cleanup as a separate patch in the same patch series.
OK
const struct bpf_struct_ops_desc *st_ops_desc;
/* protect map_update */
struct mutex lock;
@@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
* (in kvalue.data).
*/
struct bpf_link **links;
+ /* ksyms for bpf trampolines */
+ struct bpf_ksym **ksyms;
u32 funcs_cnt;
u32 image_pages_cnt;
/* image_pages is an array of pages that has all the trampolines
@@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
return 0;
}
+static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
+ void *image, unsigned int size,
+ struct bpf_ksym *ksym)
+{
+ snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
+ INIT_LIST_HEAD_RCU(&ksym->lnode);
+ bpf_image_ksym_init(image, size, ksym);
+}
+
+static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->ksyms[i])
+ break;
+ bpf_image_ksym_add(st_map->ksyms[i]);
+ }
+}
+
+static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->ksyms[i])
+ break;
+ bpf_image_ksym_del(st_map->ksyms[i]);
+ }
+}
+
+static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
+{
+ u32 i;
+
+ for (i = 0; i < st_map->funcs_cnt; i++) {
+ if (!st_map->ksyms[i])
+ break;
+ kfree(st_map->ksyms[i]);
+ st_map->links[i] = NULL;
s/links/ksyms/
pw-bot: cr
Oops, good catch
+ }
+}
+
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
@@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
u32 i, trampoline_start, image_off = 0;
void *cur_image = NULL, *image = NULL;
struct bpf_link **plink;
+ struct bpf_ksym **pksym;
+ const char *tname, *mname;
if (flags)
return -EINVAL;
@@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
kdata = &kvalue->data;
plink = st_map->links;
+ pksym = st_map->ksyms;
+ tname = btf_name_by_offset(st_map->btf, t->name_off);
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
for_each_member(i, t, member) {
const struct btf_type *mtype, *ptype;
struct bpf_prog *prog;
struct bpf_tramp_link *link;
+ struct bpf_ksym *ksym;
u32 moff;
moff = __btf_member_bit_offset(t, member) / 8;
+ mname = btf_name_by_offset(st_map->btf, member->name_off);
ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
if (ptype == module_type) {
if (*(void **)(udata + moff))
@@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
&bpf_struct_ops_link_lops, prog);
*plink++ = &link->link;
+ ksym = kzalloc(sizeof(*ksym), GFP_USER);
link is also using kzalloc but probably both link and ksym allocation should use bpf_map_kzalloc instead. This switch can be done for both together later as a follow up patch.
OK, will do.
+ if (!ksym) {
+ bpf_prog_put(prog);
afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog ownership and the bpf_struct_ops_map_put_progs() at the error path will take care of it.
Right, good catch
+ err = -ENOMEM;
+ goto reset_unlock;
+ }
+ *pksym = ksym;
nit. Follow the *plink++ style above and does the same *pksym++ here.
OK
+
trampoline_start = image_off;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[i],
@@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
/* put prog_id to udata */
*(unsigned long *)(udata + moff) = prog->aux->id;
+
+ /* init ksym for this trampoline */
+ bpf_struct_ops_ksym_init(tname, mname,
+ image + trampoline_start,
+ image_off - trampoline_start,
+ *pksym++);
then uses "ksym" here.
}
if (st_ops->validate) {
@@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
*/
reset_unlock:
+ bpf_struct_ops_map_free_ksyms(st_map);
bpf_struct_ops_map_free_image(st_map);
bpf_struct_ops_map_put_progs(st_map);
memset(uvalue, 0, map->value_size);
@@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
unlock:
kfree(tlinks);
mutex_unlock(&st_map->lock);
+ if (!err)
+ bpf_struct_ops_map_ksyms_add(st_map);
return err;
}
@@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
if (st_map->links)
bpf_struct_ops_map_put_progs(st_map);
+ if (st_map->ksyms)
+ bpf_struct_ops_map_free_ksyms(st_map);
bpf_map_area_free(st_map->links);
+ bpf_map_area_free(st_map->ksyms);
bpf_struct_ops_map_free_image(st_map);
bpf_map_area_free(st_map->uvalue);
bpf_map_area_free(st_map);
@@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
if (btf_is_module(st_map->btf))
module_put(st_map->st_ops_desc->st_ops->owner);
+ if (st_map->ksyms)
This null test should not be needed.
Agree, will remove it.
+ bpf_struct_ops_map_del_ksyms(st_map);
+
/* The struct_ops's function may switch to another struct_ops.
*
* For example, bpf_tcp_cc_x->init() may switch to
@@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
st_map->links =
bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
NUMA_NO_NODE);
- if (!st_map->uvalue || !st_map->links) {
+
+ st_map->ksyms =
+ bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
+ NUMA_NO_NODE);
.map_mem_usage at least needs to include the st_map->ksyms[] pointer array. func_cnts should be used instead of btf_type_vlen(vt) for link also in .map_mem_usage.
Right, agree
+ if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
ret = -ENOMEM;
goto errout_free;
}