int ninsns;
int prologue_len;
int epilogue_offset;
@@ -85,7 +86,9 @@ static inline int ninsns_rvoff(int ninsns)
struct rv_jit_data {
struct bpf_binary_header *header;
+ struct bpf_binary_header *ro_header;
u8 *image;
+ u8 *ro_image;
struct rv_jit_context ctx;
};
diff --git a/arch/riscv/net/bpf_jit_comp64.c
b/arch/riscv/net/bpf_jit_comp64.c
index 0ca4f5c0097c..d77b16338ba2 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -144,7 +144,11 @@ static bool in_auipc_jalr_range(s64 val)
/* Emit fixed-length instructions for address */
static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct
rv_jit_context *ctx)
{
- u64 ip = (u64)(ctx->insns + ctx->ninsns);
+ /*
+ * Use the ro_insns(RX) to calculate the offset as the BPF
program will
+ * finally run from this memory region.
+ */
+ u64 ip = (u64)(ctx->ro_insns + ctx->ninsns);
s64 off = addr - ip;
s64 upper = (off + (1 << 11)) >> 12;
s64 lower = off & 0xfff;
@@ -465,7 +469,11 @@ static int emit_call(u64 addr, bool fixed_addr,
struct rv_jit_context *ctx)
u64 ip;
if (addr && ctx->insns) {
return 0;
@@ -593,12 +602,17 @@ static int add_exception_handler(const struct
bpf_insn *insn,
return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->nexentries];
- pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len];
+ pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
- offset = pc - (long)&ex->insn;
- if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ /*
+ * This is the relative offset of the instruction that may fault
from
+ * the exception table itself. This will be written to the exception
+ * table and if this instruction faults, the destination register
will
+ * be set to '0' and the execution will jump to the next
instruction.
+ */
+ ins_offset = pc - (long)&ex->insn;
+ if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
return -ERANGE;
- ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is
always
@@ -607,12 +621,25 @@ static int add_exception_handler(const struct
bpf_insn *insn,
* bits. We don't need to worry about buildtime or runtime sort
* modifying the upper bits because the table is already sorted,
and
* isn't part of the main exception table.
+ *
+ * The fixup_offset is set to the next instruction from the
instruction
+ * that may fault. The execution will jump to this after handling
the
+ * fault.
*/
- offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
- if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+ fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
+ if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;
- ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
+ /*
+ * The offsets above have been calculated using the RO buffer but we
+ * need to use the R/W buffer for writes.
+ * switch ex to rw buffer for writing.
+ */
+ ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns);
+
+ ex->insn = ins_offset;
+
+ ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->type = EX_TYPE_BPF;
@@ -1006,6 +1033,7 @@ int arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *image,
ctx.ninsns = 0;
ctx.insns = NULL;
+ ctx.ro_insns = NULL;
ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr,
flags, &ctx);
if (ret < 0)
return ret;
@@ -1014,7 +1042,15 @@ int arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *image,
return -EFBIG;
ctx.ninsns = 0;
+ /*
+ * The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to
write the
+ * JITed instructions and later copies it to a RX region
(ctx.ro_insns).
+ * It also uses ctx.ro_insns to calculate offsets for jumps etc.
As the
+ * trampoline image uses the same memory area for writing and
execution,
+ * both ctx.insns and ctx.ro_insns can be set to image.
+ */
ctx.insns = image;
+ ctx.ro_insns = image;
ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr,
flags, &ctx);
if (ret < 0)
return ret;
diff --git a/arch/riscv/net/bpf_jit_core.c
b/arch/riscv/net/bpf_jit_core.c
index 7a26a3e1c73c..4c8dffc09368 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -8,6 +8,8 @@
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/memory.h>
+#include <asm/patch.h>
#include "bpf_jit.h"
/* Number of iterations to try until offsets converge. */
@@ -117,16 +119,27 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog)
sizeof(struct exception_table_entry);
prog_size = sizeof(*ctx->insns) * ctx->ninsns;
- jit_data->header =
- bpf_jit_binary_alloc(prog_size + extable_size,
- &jit_data->image,
- sizeof(u32),
- bpf_fill_ill_insns);
- if (!jit_data->header) {
+ jit_data->ro_header =
+ bpf_jit_binary_pack_alloc(prog_size +
+ extable_size,
+ &jit_data->ro_image,
+ sizeof(u32),
+ &jit_data->header,
+ &jit_data->image,
+ bpf_fill_ill_insns);
+ if (!jit_data->ro_header) {
prog = orig_prog;
goto out_offset;
}
+ /*
+ * Use the image(RW) for writing the JITed instructions.
But also save
+ * the ro_image(RX) for calculating the offsets in the
image. The RW
+ * image will be later copied to the RX image from where
the program
+ * will run. The bpf_jit_binary_pack_finalize() will do
this copy in the
+ * final step.
+ */
+ ctx->ro_insns = (u16 *)jit_data->ro_image;
ctx->insns = (u16 *)jit_data->image;
/*
* Now, when the image is allocated, the image can
@@ -138,14 +151,12 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog)
if (i == NR_JIT_ITERATIONS) {
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
- if (jit_data->header)
- bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
- goto out_offset;
+ goto out_free_hdr;
}
if (extable_size)
- prog->aux->extable = (void *)ctx->insns + prog_size;
+ prog->aux->extable = (void *)ctx->ro_insns + prog_size;
skip_init_ctx:
pass++;
@@ -154,23 +165,35 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog)
bpf_jit_build_prologue(ctx);
if (build_body(ctx, extra_pass, NULL)) {
- bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
- goto out_offset;
+ goto out_free_hdr;
}
bpf_jit_build_epilogue(ctx);
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
- prog->bpf_func = (void *)ctx->insns;
+ prog->bpf_func = (void *)ctx->ro_insns;
prog->jited = 1;
prog->jited_len = prog_size;
- bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
-
if (!prog->is_func || extra_pass) {
- bpf_jit_binary_lock_ro(jit_data->header);
+ if (WARN_ON(bpf_jit_binary_pack_finalize(prog,
+ jit_data->ro_header,
+ jit_data->header))) {
+ /* ro_header has been freed */
+ jit_data->ro_header = NULL;
+ prog = orig_prog;
+ goto out_offset;
+ }
+ /*
+ * The instructions have now been copied to the ROX region from
+ * where they will execute.
+ * Write any modified data cache blocks out to memory and
+ * invalidate the corresponding blocks in the instruction cache.
+ */
+ bpf_flush_icache(jit_data->ro_header,
+ ctx->ro_insns + ctx->ninsns);
for (i = 0; i < prog->len; i++)
ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
bpf_prog_fill_jited_linfo(prog, ctx->offset);
@@ -185,6 +208,15 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog)
bpf_jit_prog_release_other(prog, prog == orig_prog ?
tmp : orig_prog);
return prog;
+
+out_free_hdr:
+ if (jit_data->header) {
+ bpf_arch_text_copy(&jit_data->ro_header->size,
+ &jit_data->header->size,
+ sizeof(jit_data->header->size));
+ bpf_jit_binary_pack_free(jit_data->ro_header, jit_data->header);
+ }
+ goto out_offset;
}
u64 bpf_jit_alloc_exec_limit(void)
@@ -204,3 +236,52 @@ void bpf_jit_free_exec(void *addr)
{
return vfree(addr);
}
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ int ret;
+
+ mutex_lock(&text_mutex);
+ ret = patch_text_nosync(dst, src, len);
+ mutex_unlock(&text_mutex);
+
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ return dst;
+}
+
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+ int ret = 0;