On 3/4/22 7:43 PM, Song Liu wrote:
Using HPAGE_PMD_SIZE as the size for bpf_prog_pack is not ideal in some
cases. Specifically, for NUMA systems, __vmalloc_node_range requires
PMD_SIZE * num_online_nodes() to allocate huge pages. Also, if the system
does not support huge pages (i.e., with cmdline option nohugevmalloc), it
is better to use PAGE_SIZE packs.
Add logic to select proper size for bpf_prog_pack. This solution is not
ideal, as it makes assumption about the behavior of module_alloc and
__vmalloc_node_range. However, it appears to be the easiest solution as
it doesn't require changes in module_alloc and vmalloc code.
nit: Fixes tag?
Signed-off-by: Song Liu <song@xxxxxxxxxx>
[...]
+static size_t bpf_prog_pack_size = -1;
+
+static inline int bpf_prog_chunk_count(void)
+{
+ WARN_ON_ONCE(bpf_prog_pack_size == -1);
+ return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE;
+}
+
static DEFINE_MUTEX(pack_mutex);
static LIST_HEAD(pack_list);
static struct bpf_prog_pack *alloc_new_pack(void)
{
struct bpf_prog_pack *pack;
+ size_t size;
+ void *ptr;
- pack = kzalloc(sizeof(*pack) + BITS_TO_BYTES(BPF_PROG_CHUNK_COUNT), GFP_KERNEL);
- if (!pack)
+ if (bpf_prog_pack_size == -1) {
+ /* Test whether we can get huge pages. If not just use
+ * PAGE_SIZE packs.
+ */
+ size = PMD_SIZE * num_online_nodes();
+ ptr = module_alloc(size);
+ if (ptr && is_vm_area_hugepages(ptr)) {
+ bpf_prog_pack_size = size;
+ goto got_ptr;
+ } else {
+ bpf_prog_pack_size = PAGE_SIZE;
+ vfree(ptr);
+ }
+ }
+
+ ptr = module_alloc(bpf_prog_pack_size);
+ if (!ptr)
return NULL;
- pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
- if (!pack->ptr) {
- kfree(pack);
+got_ptr:
+ pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())),
+ GFP_KERNEL);
+ if (!pack) {
+ vfree(ptr);
return NULL;
}
- bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
+ pack->ptr = ptr;
+ bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE);
list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr);
- set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
- set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+ set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
return pack;
}
@@ -864,7 +886,7 @@ static void *bpf_prog_pack_alloc(u32 size)
unsigned long pos;
void *ptr = NULL;
- if (size > BPF_PROG_MAX_PACK_PROG_SIZE) {
+ if (size > bpf_prog_pack_size) {
size = round_up(size, PAGE_SIZE);
ptr = module_alloc(size);
if (ptr) {
What happens if the /very first/ program requests an allocation size of >PAGE_SIZE? Wouldn't
this result in OOB write?
The 'size > bpf_prog_pack_size' is initially skipped due to -1 but then the module_alloc()
won't return a huge page, so we redo the allocation with bpf_prog_pack_size as PAGE_SIZE and
return a pointer into this pack?
Thanks,
Daniel