On 10/30/23 23:36, Martin KaFai Lau wrote:
On 10/30/23 12:28 PM, thinker.li@xxxxxxxxx wrote:
From: Kui-Feng Lee <thinker.li@xxxxxxxxx>
Replace the static list of struct_ops types with per-btf
struct_ops_tab to
enable dynamic registration.
Both bpf_dummy_ops and bpf_tcp_ca now utilize the registration function
instead of being listed in bpf_struct_ops_types.h.
Cc: netdev@xxxxxxxxxxxxxxx
Signed-off-by: Kui-Feng Lee <thinker.li@xxxxxxxxx>
---
include/linux/bpf.h | 36 ++++++--
include/linux/btf.h | 5 +-
kernel/bpf/bpf_struct_ops.c | 140 +++++++++---------------------
kernel/bpf/bpf_struct_ops_types.h | 12 ---
kernel/bpf/btf.c | 41 ++++++++-
net/bpf/bpf_dummy_struct_ops.c | 14 ++-
net/ipv4/bpf_tcp_ca.c | 16 +++-
7 files changed, 140 insertions(+), 124 deletions(-)
delete mode 100644 kernel/bpf/bpf_struct_ops_types.h
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c993df3cf699..9d7105ff06db 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1644,7 +1644,6 @@ struct bpf_struct_ops_desc {
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
#define BPF_MODULE_OWNER ((void *)((0xeB9FUL << 2) +
POISON_POINTER_DELTA))
const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf
*btf, u32 type_id);
-void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
@@ -1690,10 +1689,6 @@ static inline const struct bpf_struct_ops_desc
*bpf_struct_ops_find(struct btf *
{
return NULL;
}
-static inline void bpf_struct_ops_init(struct btf *btf,
- struct bpf_verifier_log *log)
-{
-}
static inline bool bpf_try_module_get(const void *data, struct
module *owner)
{
return try_module_get(owner);
@@ -3232,4 +3227,35 @@ static inline bool bpf_is_subprog(const struct
bpf_prog *prog)
return prog->aux->func_idx != 0;
}
+int register_bpf_struct_ops(struct bpf_struct_ops *st_ops);
+
+enum bpf_struct_ops_state {
+ BPF_STRUCT_OPS_STATE_INIT,
+ BPF_STRUCT_OPS_STATE_INUSE,
+ BPF_STRUCT_OPS_STATE_TOBEFREE,
+ BPF_STRUCT_OPS_STATE_READY,
+};
+
+struct bpf_struct_ops_common_value {
+ refcount_t refcnt;
+ enum bpf_struct_ops_state state;
+};
+
+/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
+ * the map's value exposed to the userspace and its btf-type-id is
+ * stored at the map->btf_vmlinux_value_type_id.
+ *
+ */
+#define DEFINE_STRUCT_OPS_VALUE_TYPE(_name) \
+extern struct bpf_struct_ops bpf_##_name; \
+ \
+struct bpf_struct_ops_##_name { \
+ struct bpf_struct_ops_common_value common; \
+ struct _name data ____cacheline_aligned_in_smp; \
+}
+
+extern int bpf_struct_ops_init(struct bpf_struct_ops_desc *st_ops_desc,
+ struct btf *btf,
+ struct bpf_verifier_log *log);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index a8813605f2f6..954536431e0b 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -12,6 +12,8 @@
#include <uapi/linux/bpf.h>
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
+#define BTF_STRUCT_OPS_TYPE_EMIT(type) {((void)(struct type *)0); \
((void)(struct type *)0); is new. Why is it needed?
This is a trick of BTF to force compiler generate type info for
the given type. Without trick, compiler may skip these types if these
type are not used at all in the module. For example, modules usually
don't use value types of struct_ops directly.
+ ((void)(struct bpf_struct_ops_##type *)0); }
#define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val)
/* These need to be macros, as the expressions are used in assembler
input */
@@ -201,6 +203,7 @@ u32 btf_obj_id(const struct btf *btf);
bool btf_is_kernel(const struct btf *btf);
bool btf_is_module(const struct btf *btf);
struct module *btf_try_get_module(const struct btf *btf);
+struct btf *btf_get_module_btf(const struct module *module);
u32 btf_nr_types(const struct btf *btf);
bool btf_member_is_reg_int(const struct btf *btf, const struct
btf_type *s,
const struct btf_member *m,
@@ -575,8 +578,6 @@ static inline bool btf_type_is_struct_ptr(struct
btf *btf, const struct btf_type
struct bpf_struct_ops;
struct bpf_struct_ops_desc;
-struct bpf_struct_ops_desc *
-btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops);
const struct bpf_struct_ops_desc *
btf_get_struct_ops(struct btf *btf, u32 *ret_cnt);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index db2bbba50e38..f3ec72be9c63 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -13,21 +13,8 @@
#include <linux/btf_ids.h>
#include <linux/rcupdate_wait.h>
-enum bpf_struct_ops_state {
- BPF_STRUCT_OPS_STATE_INIT,
- BPF_STRUCT_OPS_STATE_INUSE,
- BPF_STRUCT_OPS_STATE_TOBEFREE,
- BPF_STRUCT_OPS_STATE_READY,
-};
-
-struct bpf_struct_ops_common_value {
- refcount_t refcnt;
- enum bpf_struct_ops_state state;
-};
-#define BPF_STRUCT_OPS_COMMON_VALUE struct
bpf_struct_ops_common_value common
-
struct bpf_struct_ops_value {
- BPF_STRUCT_OPS_COMMON_VALUE;
+ struct bpf_struct_ops_common_value common;
This cleanup is good. It should have been done together in patch 5
instead when refcnt and state were grouped into a new 'struct
bpf_struct_ops_common_value'.
Got it!
char data[] ____cacheline_aligned_in_smp;
};
@@ -72,35 +59,6 @@ static DEFINE_MUTEX(update_mutex);
#define VALUE_PREFIX "bpf_struct_ops_"
#define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
-/* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is
- * the map's value exposed to the userspace and its btf-type-id is
- * stored at the map->btf_vmlinux_value_type_id.
- *
- */
-#define BPF_STRUCT_OPS_TYPE(_name) \
-extern struct bpf_struct_ops bpf_##_name; \
- \
-struct bpf_struct_ops_##_name { \
- BPF_STRUCT_OPS_COMMON_VALUE; \
- struct _name data ____cacheline_aligned_in_smp; \
-};
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
-
-enum {
-#define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name,
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
- __NR_BPF_STRUCT_OPS_TYPE,
-};
-
-static struct bpf_struct_ops_desc bpf_struct_ops[] = {
-#define BPF_STRUCT_OPS_TYPE(_name) \
- [BPF_STRUCT_OPS_TYPE_##_name] = { .st_ops = &bpf_##_name },
-#include "bpf_struct_ops_types.h"
-#undef BPF_STRUCT_OPS_TYPE
-};
-
const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
};
@@ -110,13 +68,22 @@ const struct bpf_prog_ops bpf_struct_ops_prog_ops
= {
#endif
};
-static const struct btf_type *module_type;
-static const struct btf_type *common_value_type;
+BTF_ID_LIST(st_ops_ids)
+BTF_ID(struct, module)
+BTF_ID(struct, bpf_struct_ops_common_value)
This should have been done in a separated patch immediately after patch
1. The patch 7 has unrelated changes/cleanups like this and the above
BPF_STRUCT_OPS_COMMON_VALUE which could have been done earlier as
preparation patches instead of packing them together with the main
change here: "switch to dynamic registration". The commit message for
the BTF_ID_LIST changes could be like: "A preparation to completely
retire the bpf_struct_ops_init() function in the latter patch...".
Got it!
+
+enum {
+ idx_module_id,
+ idx_st_ops_common_value_id,
nit. upper case to stay consistent with other similar usages.
+};
+
[ ... ]
+int register_bpf_struct_ops(struct bpf_struct_ops *st_ops)
+{
+ struct bpf_struct_ops_desc *desc;
+ struct bpf_verifier_log *log;
+ struct btf *btf;
+ int err = 0;
+
+ if (st_ops == NULL)
NULL check is not needed. caller will never do that. If it really wanted
to try, other values would have similar effect.
Ok!
+ return -EINVAL;
+
+ btf = btf_get_module_btf(st_ops->owner);
+ if (!btf)
+ return -EINVAL;
+
+ log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN);
+ if (!log) {
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ log->level = BPF_LOG_KERNEL;
+
+ desc = btf_add_struct_ops(btf, st_ops);
+ if (IS_ERR(desc)) {
+ err = PTR_ERR(desc);
+ goto errout;
+ }
+
+ err = bpf_struct_ops_init(desc, btf, log);
When bpf_struct_ops_init() returns err, desc is in btf_struct_ops_tab
but it is in an uninitialized state. May be do the bpf_struct_ops_init()
in btf_add_struct_ops() and only increment struct_ops_tab->cnt when
everything is correct.
agree
+
+errout:
+ kfree(log);
+ btf_put(btf);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(register_bpf_struct_ops);