On 8/26/22 4:44 AM, Alexei Starovoitov wrote:
From: Alexei Starovoitov <ast@xxxxxxxxxx>
Introduce sysctl kernel.bpf_force_dyn_alloc to force dynamic allocation in bpf
hash map. All selftests/bpf should pass with bpf_force_dyn_alloc 0 or 1 and all
bpf programs (both sleepable and not) should not see any functional difference.
The sysctl's observable behavior should only be improved memory usage.
Acked-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
include/linux/filter.h | 2 ++
kernel/bpf/core.c | 2 ++
kernel/bpf/hashtab.c | 5 +++++
kernel/bpf/syscall.c | 9 +++++++++
4 files changed, 18 insertions(+)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5f21dc3c432..eb4d4a0c0bde 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1009,6 +1009,8 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
}
#endif
+extern int bpf_force_dyn_alloc;
+
#ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable;
extern int bpf_jit_harden;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 639437f36928..a13e78ea4b90 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -533,6 +533,8 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
bpf_prog_kallsyms_del(fp);
}
+int bpf_force_dyn_alloc __read_mostly;
+
#ifdef CONFIG_BPF_JIT
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 89f26cbddef5..f68a3400939e 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -505,6 +505,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bpf_map_init_from_attr(&htab->map, attr);
+ if (!lru && bpf_force_dyn_alloc) {
+ prealloc = false;
+ htab->map.map_flags |= BPF_F_NO_PREALLOC;
+ }
+
The rationale is essentially for testing, right? Would be nice to avoid
making this patch uapi. It will just confuse users with implementation
details, imho, and then it's hard to remove it again.
if (percpu_lru) {
/* ensure each CPU's lru list has >=1 elements.
* since we are at it, make each lru list has the same
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 074c901fbb4e..5c631244b63b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5299,6 +5299,15 @@ static struct ctl_table bpf_syscall_table[] = {
.mode = 0644,
.proc_handler = bpf_stats_handler,
},
+ {
+ .procname = "bpf_force_dyn_alloc",
+ .data = &bpf_force_dyn_alloc,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};