On Thu, Jul 11, 2019 at 4:53 AM Jiong Wang <jiong.wang@xxxxxxxxxxxxx> wrote: > > > Andrii Nakryiko writes: > > > On Thu, Jul 4, 2019 at 2:32 PM Jiong Wang <jiong.wang@xxxxxxxxxxxxx> wrote: > >> > >> This patch introduces list based bpf insn patching infra to bpf core layer > >> which is lower than verification layer. > >> > >> This layer has bpf insn sequence as the solo input, therefore the tasks > >> to be finished during list linerization is: > >> - copy insn > >> - relocate jumps > >> - relocation line info. > >> > >> Suggested-by: Alexei Starovoitov <ast@xxxxxxxxxx> > >> Suggested-by: Edward Cree <ecree@xxxxxxxxxxxxxx> > >> Signed-off-by: Jiong Wang <jiong.wang@xxxxxxxxxxxxx> > >> --- > >> include/linux/filter.h | 25 +++++ > >> kernel/bpf/core.c | 268 +++++++++++++++++++++++++++++++++++++++++++++++++ > >> 2 files changed, 293 insertions(+) > >> > >> diff --git a/include/linux/filter.h b/include/linux/filter.h > >> index 1fe53e7..1fea68c 100644 > >> --- a/include/linux/filter.h > >> +++ b/include/linux/filter.h > >> @@ -842,6 +842,31 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, > >> const struct bpf_insn *patch, u32 len); > >> int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); > >> > >> +int bpf_jit_adj_imm_off(struct bpf_insn *insn, int old_idx, int new_idx, > >> + int idx_map[]); > >> + > >> +#define LIST_INSN_FLAG_PATCHED 0x1 > >> +#define LIST_INSN_FLAG_REMOVED 0x2 > >> +struct bpf_list_insn { > >> + struct bpf_insn insn; > >> + struct bpf_list_insn *next; > >> + s32 orig_idx; > >> + u32 flag; > >> +}; > >> + > >> +struct bpf_list_insn *bpf_create_list_insn(struct bpf_prog *prog); > >> +void bpf_destroy_list_insn(struct bpf_list_insn *list); > >> +/* Replace LIST_INSN with new list insns generated from PATCH. */ > >> +struct bpf_list_insn *bpf_patch_list_insn(struct bpf_list_insn *list_insn, > >> + const struct bpf_insn *patch, > >> + u32 len); > >> +/* Pre-patch list_insn with insns inside PATCH, meaning LIST_INSN is not > >> + * touched. New list insns are inserted before it. > >> + */ > >> +struct bpf_list_insn *bpf_prepatch_list_insn(struct bpf_list_insn *list_insn, > >> + const struct bpf_insn *patch, > >> + u32 len); > >> + > >> void bpf_clear_redirect_map(struct bpf_map *map); > >> > >> static inline bool xdp_return_frame_no_direct(void) > >> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > >> index e2c1b43..e60703e 100644 > >> --- a/kernel/bpf/core.c > >> +++ b/kernel/bpf/core.c > >> @@ -502,6 +502,274 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) > >> return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false)); > >> } > >> > >> +int bpf_jit_adj_imm_off(struct bpf_insn *insn, int old_idx, int new_idx, > >> + s32 idx_map[]) > >> +{ > >> + u8 code = insn->code; > >> + s64 imm; > >> + s32 off; > >> + > >> + if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) > >> + return 0; > >> + > >> + if (BPF_CLASS(code) == BPF_JMP && > >> + (BPF_OP(code) == BPF_EXIT || > >> + (BPF_OP(code) == BPF_CALL && insn->src_reg != BPF_PSEUDO_CALL))) > >> + return 0; > >> + > >> + /* BPF to BPF call. */ > >> + if (BPF_OP(code) == BPF_CALL) { > >> + imm = idx_map[old_idx + insn->imm + 1] - new_idx - 1; > >> + if (imm < S32_MIN || imm > S32_MAX) > >> + return -ERANGE; > >> + insn->imm = imm; > >> + return 1; > >> + } > >> + > >> + /* Jump. */ > >> + off = idx_map[old_idx + insn->off + 1] - new_idx - 1; > >> + if (off < S16_MIN || off > S16_MAX) > >> + return -ERANGE; > >> + insn->off = off; > >> + return 0; > >> +} > >> + > >> +void bpf_destroy_list_insn(struct bpf_list_insn *list) > >> +{ > >> + struct bpf_list_insn *elem, *next; > >> + > >> + for (elem = list; elem; elem = next) { > >> + next = elem->next; > >> + kvfree(elem); > >> + } > >> +} > >> + > >> +struct bpf_list_insn *bpf_create_list_insn(struct bpf_prog *prog) > >> +{ > >> + unsigned int idx, len = prog->len; > >> + struct bpf_list_insn *hdr, *prev; > >> + struct bpf_insn *insns; > >> + > >> + hdr = kvzalloc(sizeof(*hdr), GFP_KERNEL); > >> + if (!hdr) > >> + return ERR_PTR(-ENOMEM); > >> + > >> + insns = prog->insnsi; > >> + hdr->insn = insns[0]; > >> + hdr->orig_idx = 1; > >> + prev = hdr; > > > > I'm not sure why you need this "prologue" instead of handling first > > instruction uniformly in for loop below? > > It is because the head of the list doesn't have precessor, so no need of > the prev->next assignment, not could do a check inside the loop to rule the > head out when doing it. yeah, prev = NULL initially. Then if (prev) prev->next = node; Or see my suggestiong about having patchabel_insns_list wrapper struct (in cover letter thread). > > >> + > >> + for (idx = 1; idx < len; idx++) { > >> + struct bpf_list_insn *node = kvzalloc(sizeof(*node), > >> + GFP_KERNEL); > >> + > >> + if (!node) { > >> + /* Destroy what has been allocated. */ > >> + bpf_destroy_list_insn(hdr); > >> + return ERR_PTR(-ENOMEM); > >> + } > >> + node->insn = insns[idx]; > >> + node->orig_idx = idx + 1; > > > > Why orig_idx is 1-based? It's really confusing. > > orig_idx == 0 means one insn is without original insn, means it is an new > insn generated for patching purpose. > > While the LIST_INSN_FLAG_PATCHED in the RFC means one insn in original prog > is patched. > > I had been trying to differenciate above two cases, but yes, they are > confusing and differenciating them might be useless, if an insn in original > prog is patched, all its info could be treated as clobbered and needing > re-calculating or should do conservative assumption. Instruction will be new and not patched only in patch_buffer. Once you add them to the list, they are patched, no? Not sure what's the distinction you are trying to maintain here. > > > > >> + prev->next = node; > >> + prev = node; > >> + } > >> + > >> + return hdr; > >> +} > >> + [...] > >> + > >> + len--; > >> + patch++; > >> + > >> + prev = list_insn; > >> + next = list_insn->next; > >> + for (idx = 0; idx < len; idx++) { > >> + struct bpf_list_insn *node = kvzalloc(sizeof(*node), > >> + GFP_KERNEL); > >> + > >> + if (!node) { > >> + /* Link what's allocated, so list destroyer could > >> + * free them. > >> + */ > >> + prev->next = next; > > > > Why this special handling, if you can just insert element so that list > > is well-formed after each instruction? > > Good idea, just always do "node->next = next", the "prev->next = node" in > next round will fix it. > > > > >> + return ERR_PTR(-ENOMEM); > >> + } > >> + > >> + node->insn = patch[idx]; > >> + prev->next = node; > >> + prev = node; > > > > E.g., > > > > node->next = next; > > prev->next = node; > > prev = node; > > > >> + } > >> + > >> + prev->next = next; > > > > And no need for this either. > > > >> + return prev; > >> +} > >> + > >> +struct bpf_list_insn *bpf_prepatch_list_insn(struct bpf_list_insn *list_insn, > >> + const struct bpf_insn *patch, > >> + u32 len) > > > > prepatch and patch functions should share the same logic. > > > > Prepend is just that - insert all instructions from buffer before current insns. > > Patch -> replace current one with first instriction in a buffer, then > > prepend remaining ones before the next instruction (so patch should > > call info prepend, with adjusted count and array pointer). > > Ack, there indeed has quite a few things to simplify. > > > > >> +{ > >> + struct bpf_list_insn *prev, *node, *begin_node; > >> + u32 idx; > >> + > >> + if (!len) > >> + return list_insn; > >> + > >> + node = kvzalloc(sizeof(*node), GFP_KERNEL); > >> + if (!node) > >> + return ERR_PTR(-ENOMEM); > >> + node->insn = patch[0]; > >> + begin_node = node; > >> + prev = node; > >> + > >> + for (idx = 1; idx < len; idx++) { > >> + node = kvzalloc(sizeof(*node), GFP_KERNEL); > >> + if (!node) { > >> + node = begin_node; > >> + /* Release what's has been allocated. */ > >> + while (node) { > >> + struct bpf_list_insn *next = node->next; > >> + > >> + kvfree(node); > >> + node = next; > >> + } > >> + return ERR_PTR(-ENOMEM); > >> + } > >> + node->insn = patch[idx]; > >> + prev->next = node; > >> + prev = node; > >> + } > >> + > >> + prev->next = list_insn; > >> + return begin_node; > >> +} > >> + > >> void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) > >> { > >> int i; > >> -- > >> 2.7.4 > >> >