Re: [PATCH bpf-next v8 6/6] bpf: Use try_alloc_pages() to allocate pages for bpf needs.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2/13/25 04:35, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@xxxxxxxxxx>
> 
> Use try_alloc_pages() and free_pages_nolock() for BPF needs
> when context doesn't allow using normal alloc_pages.
> This is a prerequisite for further work.
> 
> Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
> ---
>  include/linux/bpf.h  |  2 +-
>  kernel/bpf/arena.c   |  5 ++---
>  kernel/bpf/syscall.c | 23 ++++++++++++++++++++---
>  3 files changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f3f50e29d639..e1838a341817 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2348,7 +2348,7 @@ int  generic_map_delete_batch(struct bpf_map *map,
>  struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
>  struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
>  
> -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
> +int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
>  			unsigned long nr_pages, struct page **page_array);
>  #ifdef CONFIG_MEMCG
>  void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
> diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
> index 0975d7f22544..8ecc62e6b1a2 100644
> --- a/kernel/bpf/arena.c
> +++ b/kernel/bpf/arena.c
> @@ -287,7 +287,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
>  		return VM_FAULT_SIGSEGV;
>  
>  	/* Account into memcg of the process that created bpf_arena */
> -	ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page);
> +	ret = bpf_map_alloc_pages(map, NUMA_NO_NODE, 1, &page);
>  	if (ret) {
>  		range_tree_set(&arena->rt, vmf->pgoff, 1);
>  		return VM_FAULT_SIGSEGV;
> @@ -465,8 +465,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
>  	if (ret)
>  		goto out_free_pages;
>  
> -	ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO,
> -				  node_id, page_cnt, pages);
> +	ret = bpf_map_alloc_pages(&arena->map, node_id, page_cnt, pages);
>  	if (ret)
>  		goto out;
>  
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index c420edbfb7c8..a7af8d0185d0 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -569,7 +569,24 @@ static void bpf_map_release_memcg(struct bpf_map *map)
>  }
>  #endif
>  
> -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
> +static bool can_alloc_pages(void)
> +{
> +	return preempt_count() == 0 && !irqs_disabled() &&
> +		!IS_ENABLED(CONFIG_PREEMPT_RT);
> +}
> +

I see this is new since v6 and wasn't yet discussed (or I missed it?)

I wonder how reliable these preempt/irq_disabled checks are for correctness
purposes, e.g. we don't have CONFIG_PREEMPT_COUNT enabled always? As longs
as the callers of bpf_map_alloc_pages() know the context and pass gfp
accordingly, can't we use i.e. gfpflags_allow_blocking() to determine if
try_alloc_pages() should be used or not?

> +static struct page *__bpf_alloc_page(int nid)
> +{
> +	if (!can_alloc_pages())
> +		return try_alloc_pages(nid, 0);
> +
> +	return alloc_pages_node(nid,
> +				GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT
> +				| __GFP_NOWARN,
> +				0);
> +}
> +
> +int bpf_map_alloc_pages(const struct bpf_map *map, int nid,
>  			unsigned long nr_pages, struct page **pages)
>  {
>  	unsigned long i, j;
> @@ -582,14 +599,14 @@ int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
>  	old_memcg = set_active_memcg(memcg);
>  #endif
>  	for (i = 0; i < nr_pages; i++) {
> -		pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0);
> +		pg = __bpf_alloc_page(nid);
>  
>  		if (pg) {
>  			pages[i] = pg;
>  			continue;
>  		}
>  		for (j = 0; j < i; j++)
> -			__free_page(pages[j]);
> +			free_pages_nolock(pages[j], 0);
>  		ret = -ENOMEM;
>  		break;
>  	}





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux