On 9/30/24 7:49 AM, Alexei Starovoitov wrote:
On Thu, Sep 26, 2024 at 4:45 PM Yonghong Song <yonghong.song@xxxxxxxxx> wrote:
Three private stack states are used to direct jit action:
PSTACK_TREE_NO: do not use private stack
PSTACK_TREE_INTERNAL: adjust frame pointer address (similar to normal stack)
PSTACK_TREE_ROOT: set the frame pointer
Note that for subtree root, even if the root bpf_prog stack size is 0,
PSTACK_TREE_INTERNAL is still used. This is for bpf exception handling.
More details can be found in subsequent jit support and selftest patches.
Signed-off-by: Yonghong Song <yonghong.song@xxxxxxxxx>
---
include/linux/bpf.h | 9 +++++++++
kernel/bpf/core.c | 19 +++++++++++++++++++
kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 156b9516d9f6..8f02d11bd408 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1550,6 +1550,12 @@ struct bpf_prog_aux {
};
};
+enum bpf_pstack_state {
+ PSTACK_TREE_NO,
+ PSTACK_TREE_INTERNAL,
+ PSTACK_TREE_ROOT,
+};
The names could be improved and 'state' doesn't quite fit imo.
How about:
enum bpf_priv_stack_mode {
NO_PRIV_STACK,
PRIV_STACK_SUB_PROG,
PRIV_STACK_MAIN_PROG,
};
Since we agreed to use priv_stack instead of pstack. The above
names make sense. Will change.
+
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
@@ -1570,15 +1576,18 @@ struct bpf_prog {
pstack_eligible:1; /* Candidate for private stacks */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
+ enum bpf_pstack_state pstack:2; /* Private stack state */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
u8 tag[BPF_TAG_SIZE];
+ u16 subtree_stack_depth; /* Subtree stack depth if PSTACK_TREE_ROOT prog, 0 otherwise */
All the extra vars can be in prog->aux.
No need to put them in struct bpf_prog.
Will do.
struct bpf_prog_stats __percpu *stats;
int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *insn);
struct bpf_prog_aux *aux; /* Auxiliary fields */
struct sock_fprog_kern *orig_prog; /* Original BPF program */
+ void __percpu *private_stack_ptr;
same as this one. prog->aux should be fine.
Will do.
/* Instructions for interpreter */
union {
DECLARE_FLEX_ARRAY(struct sock_filter, insns);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 0727fff6de0e..d6eb052f6631 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1239,6 +1239,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
bpf_jit_binary_free(hdr);
+ free_percpu(fp->private_stack_ptr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
@@ -2420,6 +2421,24 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
if (*err)
return fp;
+ if (fp->pstack_eligible) {
+ if (!fp->aux->stack_depth) {
+ fp->pstack = PSTACK_TREE_NO;
+ } else {
+ void __percpu *private_stack_ptr;
+
+ fp->pstack = PSTACK_TREE_ROOT;
+ private_stack_ptr =
+ __alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
+ if (!private_stack_ptr) {
+ *err = -ENOMEM;
+ return fp;
+ }
+ fp->subtree_stack_depth = fp->aux->stack_depth;
+ fp->private_stack_ptr = private_stack_ptr;
+ }
+ }
+
fp = bpf_int_jit_compile(fp);
bpf_prog_jit_attempt_done(fp);
if (!fp->jited && jit_needed) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 69e17cb22037..9d093e2013ca 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20060,6 +20060,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog, **func, *tmp;
int i, j, subprog_start, subprog_end = 0, len, subprog;
+ int subtree_top_idx, subtree_stack_depth;
struct bpf_map *map_ptr;
struct bpf_insn *insn;
void *old_bpf_func;
@@ -20138,6 +20139,35 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->is_func = 1;
func[i]->sleepable = prog->sleepable;
func[i]->aux->func_idx = i;
+
+ subtree_top_idx = env->subprog_info[i].subtree_top_idx;
+ if (env->subprog_info[subtree_top_idx].pstack_eligible) {
+ if (subtree_top_idx == i)
+ func[i]->subtree_stack_depth =
+ env->subprog_info[i].subtree_stack_depth;
+
+ subtree_stack_depth = func[i]->subtree_stack_depth;
+ if (subtree_top_idx != i) {
+ if (env->subprog_info[subtree_top_idx].subtree_stack_depth)
+ func[i]->pstack = PSTACK_TREE_INTERNAL;
+ else
+ func[i]->pstack = PSTACK_TREE_NO;
+ } else if (!subtree_stack_depth) {
+ func[i]->pstack = PSTACK_TREE_INTERNAL;
+ } else {
+ void __percpu *private_stack_ptr;
+
+ func[i]->pstack = PSTACK_TREE_ROOT;
+ private_stack_ptr =
+ __alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
+ if (!private_stack_ptr) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+ func[i]->private_stack_ptr = private_stack_ptr;
+ }
+ }
+
/* Below members will be freed only at prog->aux */
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
--
2.43.5