The memory allocation via bpf_map_*alloc or bpf memalloc are accounted with active vm. We only need to annotate the allocation. These memory will automatically unaccount when they are freed. Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> --- fs/proc/meminfo.c | 3 +++ include/linux/active_vm.h | 10 +++++----- kernel/bpf/memalloc.c | 5 +++++ kernel/bpf/ringbuf.c | 8 ++++++-- kernel/bpf/syscall.c | 25 +++++++++++++++++++++++-- 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 440960110a42..efe1fbd6a80e 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -16,6 +16,7 @@ #ifdef CONFIG_CMA #include <linux/cma.h> #endif +#include <linux/active_vm.h> #include <asm/page.h> #include "internal.h" @@ -159,6 +160,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) arch_report_meminfo(m); + seq_printf(m, "BPF: %8lu kB\n", + active_vm_item_sum(ACTIVE_VM_BPF) >> 10); return 0; } diff --git a/include/linux/active_vm.h b/include/linux/active_vm.h index 21f9aaca12c4..e26edfb3654e 100644 --- a/include/linux/active_vm.h +++ b/include/linux/active_vm.h @@ -2,6 +2,11 @@ #ifndef __INCLUDE_ACTIVE_VM_H #define __INCLUDE_ACTIVE_VM_H +enum active_vm_item { + ACTIVE_VM_BPF = 1, + NR_ACTIVE_VM_ITEM = ACTIVE_VM_BPF, +}; + #ifdef CONFIG_ACTIVE_VM #include <linux/jump_label.h> #include <linux/preempt.h> @@ -18,11 +23,6 @@ static inline bool active_vm_enabled(void) return true; } -enum active_vm_item { - DUMMY_ITEM = 1, - NR_ACTIVE_VM_ITEM = DUMMY_ITEM, -}; - struct active_vm_stat { long stat[NR_ACTIVE_VM_ITEM]; }; diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index ebcc3dd0fa19..403ae0d83241 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -7,6 +7,8 @@ #include <linux/bpf_mem_alloc.h> #include <linux/memcontrol.h> #include <asm/local.h> +#include <linux/page_ext.h> +#include <linux/active_vm.h> /* Any context (including NMI) BPF specific memory allocator. * @@ -165,11 +167,13 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node) { struct mem_cgroup *memcg = NULL, *old_memcg; unsigned long flags; + int old_active_vm; void *obj; int i; memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); for (i = 0; i < cnt; i++) { /* * free_by_rcu is only manipulated by irq work refill_work(). @@ -209,6 +213,7 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node) if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_restore(flags); } + active_vm_item_set(old_active_vm); set_active_memcg(old_memcg); mem_cgroup_put(memcg); } diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 3264bf509c68..7575f078eb34 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -11,6 +11,7 @@ #include <linux/kmemleak.h> #include <uapi/linux/btf.h> #include <linux/btf_ids.h> +#include <linux/active_vm.h> #define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE) @@ -107,16 +108,18 @@ static struct page **bpf_ringbuf_pages_alloc(int nr_meta_pages, { int nr_pages = nr_meta_pages + nr_data_pages; struct page **pages, *page; + int old_active_vm; int array_size; int i; + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages); pages = bpf_map_area_alloc(array_size, numa_node); if (!pages) goto err; for (i = 0; i < nr_pages; i++) { - page = alloc_pages_node(numa_node, flags, 0); + page = alloc_pages_node(numa_node, flags | __GFP_ACCOUNT, 0); if (!page) { nr_pages = i; goto err_free_pages; @@ -125,12 +128,13 @@ static struct page **bpf_ringbuf_pages_alloc(int nr_meta_pages, if (i >= nr_meta_pages) pages[nr_data_pages + i] = page; } - + active_vm_item_set(old_active_vm); return pages; err_free_pages: bpf_ringbuf_pages_free(pages, nr_pages); err: + active_vm_item_set(old_active_vm); return NULL; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c38875d6aea4..92572d4a09fb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -35,6 +35,8 @@ #include <linux/rcupdate_trace.h> #include <linux/memcontrol.h> #include <linux/trace_events.h> +#include <linux/page_ext.h> +#include <linux/active_vm.h> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -312,11 +314,14 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT; unsigned int flags = 0; unsigned long align = 1; + int old_active_vm; void *area; + void *ptr; if (size >= SIZE_MAX) return NULL; + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); /* kmalloc()'ed memory can't be mmap()'ed */ if (mmapable) { BUG_ON(!PAGE_ALIGNED(size)); @@ -325,13 +330,17 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, numa_node); - if (area != NULL) + if (area != NULL) { + active_vm_item_set(old_active_vm); return area; + } } - return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, + ptr = __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL, flags, numa_node, __builtin_return_address(0)); + active_vm_item_set(old_active_vm); + return ptr; } void *bpf_map_area_alloc(u64 size, int numa_node) @@ -445,11 +454,14 @@ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node) { struct mem_cgroup *memcg, *old_memcg; + int old_active_vm; void *ptr; memcg = bpf_map_get_memcg(map); old_memcg = set_active_memcg(memcg); + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); + active_vm_item_set(old_active_vm); set_active_memcg(old_memcg); mem_cgroup_put(memcg); @@ -459,11 +471,14 @@ void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) { struct mem_cgroup *memcg, *old_memcg; + int old_active_vm; void *ptr; memcg = bpf_map_get_memcg(map); old_memcg = set_active_memcg(memcg); + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); ptr = kzalloc(size, flags | __GFP_ACCOUNT); + active_vm_item_set(old_active_vm); set_active_memcg(old_memcg); mem_cgroup_put(memcg); @@ -474,11 +489,14 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags) { struct mem_cgroup *memcg, *old_memcg; + int old_active_vm; void *ptr; memcg = bpf_map_get_memcg(map); old_memcg = set_active_memcg(memcg); + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); + active_vm_item_set(old_active_vm); set_active_memcg(old_memcg); mem_cgroup_put(memcg); @@ -490,10 +508,13 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, { struct mem_cgroup *memcg, *old_memcg; void __percpu *ptr; + int old_active_vm; memcg = bpf_map_get_memcg(map); old_memcg = set_active_memcg(memcg); + old_active_vm = active_vm_item_set(ACTIVE_VM_BPF); ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); + active_vm_item_set(old_active_vm); set_active_memcg(old_memcg); mem_cgroup_put(memcg); -- 2.30.1 (Apple Git-130)