Currently passing alignment greater than 4 to bpf_jit_binary_alloc does not work: in such cases it aligns only to 4 bytes. However, this is required on s390, where in order to load a constant from memory in a large (>512k) BPF program, one must use lgrl instruction, whose memory operand must be aligned on an 8-byte boundary. This patch makes it possible to request an arbitrary power-of-2 alignment from bpf_jit_binary_alloc by allocating extra padding bytes and aligning the resulting pointer rather than the start offset. An alternative would be to simply increase the alignment of bpf_binary_header.image to 8, but this would increase the risk of wasting a page on arches that don't need it, and would also be insufficient in case someone needs e.g. 16-byte alignment in the future. Signed-off-by: Ilya Leoshkevich <iii@xxxxxxxxxxxxx> --- include/linux/filter.h | 6 ++++-- kernel/bpf/core.c | 22 +++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 7a6f8f6f1da4..351a31eec24b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -515,10 +515,12 @@ struct sock_fprog_kern { struct sock_filter *filter; }; +/* Some arches need word alignment for their instructions */ +#define BPF_IMAGE_ALIGNMENT 4 + struct bpf_binary_header { u32 pages; - /* Some arches need word alignment for their instructions */ - u8 image[] __aligned(4); + u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); }; struct bpf_prog { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1fde0303280..75dd3a43ada0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -31,6 +31,8 @@ #include <linux/rcupdate.h> #include <linux/perf_event.h> #include <linux/extable.h> +#include <linux/kernel.h> +#include <linux/log2.h> #include <asm/unaligned.h> /* Registers */ @@ -812,14 +814,20 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { + u32 size, hole, start, pages, padding; struct bpf_binary_header *hdr; - u32 size, hole, start, pages; + + WARN_ON_ONCE(!is_power_of_2(alignment)); + if (alignment <= BPF_IMAGE_ALIGNMENT) + padding = 0; + else + padding = alignment - BPF_IMAGE_ALIGNMENT + 1; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ - size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + size = round_up(proglen + sizeof(*hdr) + padding + 128, PAGE_SIZE); pages = size / PAGE_SIZE; if (bpf_jit_charge_modmem(pages)) @@ -834,12 +842,16 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_fill_ill_insns(hdr, size); hdr->pages = pages; - hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), + hole = min_t(unsigned int, + size - (proglen + sizeof(*hdr) + padding), PAGE_SIZE - sizeof(*hdr)); - start = (get_random_int() % hole) & ~(alignment - 1); + start = get_random_int() % hole; /* Leave a random number of instructions before BPF code. */ - *image_ptr = &hdr->image[start]; + if (alignment <= BPF_IMAGE_ALIGNMENT) + *image_ptr = &hdr->image[start & ~(alignment - 1)]; + else + *image_ptr = PTR_ALIGN(&hdr->image[start], alignment); return hdr; } -- 2.23.0