Re: [PATCH 2/4] mm: support nesting memalloc_use_memcg()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 20, 2020 at 06:39:30PM -0400, Dan Schatzberg wrote:
> The memalloc_use_memcg() function to override the default memcg
> accounting context currently doesn't nest. But the patches to make the
> loop driver cgroup-aware will end up nesting:
> 
> [   98.137605]  alloc_page_buffers+0x210/0x288
> [   98.141799]  __getblk_gfp+0x1d4/0x400
> [   98.145475]  ext4_read_block_bitmap_nowait+0x148/0xbc8
> [   98.150628]  ext4_mb_init_cache+0x25c/0x9b0
> [   98.154821]  ext4_mb_init_group+0x270/0x390
> [   98.159014]  ext4_mb_good_group+0x264/0x270
> [   98.163208]  ext4_mb_regular_allocator+0x480/0x798
> [   98.168011]  ext4_mb_new_blocks+0x958/0x10f8
> [   98.172294]  ext4_ext_map_blocks+0xec8/0x1618
> [   98.176660]  ext4_map_blocks+0x1b8/0x8a0
> [   98.180592]  ext4_writepages+0x830/0xf10
> [   98.184523]  do_writepages+0xb4/0x198
> [   98.188195]  __filemap_fdatawrite_range+0x170/0x1c8
> [   98.193086]  filemap_write_and_wait_range+0x40/0xb0
> [   98.197974]  ext4_punch_hole+0x4a4/0x660
> [   98.201907]  ext4_fallocate+0x294/0x1190
> [   98.205839]  loop_process_work+0x690/0x1100
> [   98.210032]  loop_workfn+0x2c/0x110
> [   98.213529]  process_one_work+0x3e0/0x648
> [   98.217546]  worker_thread+0x70/0x670
> [   98.221217]  kthread+0x1b8/0x1c0
> [   98.224452]  ret_from_fork+0x10/0x18
> 
> where loop_process_work() sets the memcg override to the memcg that
> submitted the IO request, and alloc_page_buffers() sets the override
> to the memcg that instantiated the cache page, which may differ.
> 
> Make memalloc_use_memcg() return the old memcg and convert existing
> users to a stacking model. Delete the unused memalloc_unuse_memcg().
> 
> Signed-off-by: Dan Schatzberg <schatzberg.dan@xxxxxxxxx>

Acked-by: Roman Gushchin <guro@xxxxxx>

One small nit below.

Thanks!

> ---
>  fs/buffer.c                          |  6 +++---
>  fs/notify/fanotify/fanotify.c        |  5 +++--
>  fs/notify/inotify/inotify_fsnotify.c |  5 +++--
>  include/linux/sched/mm.h             | 28 +++++++++-------------------
>  4 files changed, 18 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 599a0bf7257b..e39e05985323 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -851,13 +851,13 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
>  	struct buffer_head *bh, *head;
>  	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
>  	long offset;
> -	struct mem_cgroup *memcg;
> +	struct mem_cgroup *memcg, *oldmemcg;

I'd rename it to old_memcg.

>  
>  	if (retry)
>  		gfp |= __GFP_NOFAIL;
>  
>  	memcg = get_mem_cgroup_from_page(page);
> -	memalloc_use_memcg(memcg);
> +	oldmemcg = memalloc_use_memcg(memcg);
>  
>  	head = NULL;
>  	offset = PAGE_SIZE;
> @@ -876,7 +876,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
>  		set_bh_page(bh, page, offset);
>  	}
>  out:
> -	memalloc_unuse_memcg();
> +	memalloc_use_memcg(oldmemcg);
>  	mem_cgroup_put(memcg);
>  	return head;
>  /*
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 5435a40f82be..54c787cd6efb 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -353,6 +353,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
>  	gfp_t gfp = GFP_KERNEL_ACCOUNT;
>  	struct inode *id = fanotify_fid_inode(inode, mask, data, data_type);
>  	const struct path *path = fsnotify_data_path(data, data_type);
> +	struct mem_cgroup *oldmemcg;
>  
>  	/*
>  	 * For queues with unlimited length lost events are not expected and
> @@ -366,7 +367,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
>  		gfp |= __GFP_RETRY_MAYFAIL;
>  
>  	/* Whoever is interested in the event, pays for the allocation. */
> -	memalloc_use_memcg(group->memcg);
> +	oldmemcg = memalloc_use_memcg(group->memcg);
>  
>  	if (fanotify_is_perm_event(mask)) {
>  		struct fanotify_perm_event *pevent;
> @@ -451,7 +452,7 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
>  		}
>  	}
>  out:
> -	memalloc_unuse_memcg();
> +	memalloc_use_memcg(oldmemcg);
>  	return event;
>  }
>  
> diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
> index 2ebc89047153..d27c6e83cea6 100644
> --- a/fs/notify/inotify/inotify_fsnotify.c
> +++ b/fs/notify/inotify/inotify_fsnotify.c
> @@ -69,6 +69,7 @@ int inotify_handle_event(struct fsnotify_group *group,
>  	int ret;
>  	int len = 0;
>  	int alloc_len = sizeof(struct inotify_event_info);
> +	struct mem_cgroup *oldmemcg;
>  
>  	if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
>  		return 0;
> @@ -93,9 +94,9 @@ int inotify_handle_event(struct fsnotify_group *group,
>  	 * trigger OOM killer in the target monitoring memcg as it may have
>  	 * security repercussion.
>  	 */
> -	memalloc_use_memcg(group->memcg);
> +	oldmemcg = memalloc_use_memcg(group->memcg);
>  	event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
> -	memalloc_unuse_memcg();
> +	memalloc_use_memcg(oldmemcg);
>  
>  	if (unlikely(!event)) {
>  		/*
> diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> index c49257a3b510..95e8bfb0cab1 100644
> --- a/include/linux/sched/mm.h
> +++ b/include/linux/sched/mm.h
> @@ -316,31 +316,21 @@ static inline void memalloc_nocma_restore(unsigned int flags)
>   * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
>   * given memcg.
>   *
> - * NOTE: This function is not nesting safe.
> + * NOTE: This function can nest. Users must save the return value and
> + * reset the previous value after their own charging scope is over
>   */
> -static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
> +static inline struct mem_cgroup *
> +memalloc_use_memcg(struct mem_cgroup *memcg)
>  {
> -	WARN_ON_ONCE(current->active_memcg);
> +	struct mem_cgroup *old = current->active_memcg;
>  	current->active_memcg = memcg;
> -}
> -
> -/**
> - * memalloc_unuse_memcg - Ends the remote memcg charging scope.
> - *
> - * This function marks the end of the remote memcg charging scope started by
> - * memalloc_use_memcg().
> - */
> -static inline void memalloc_unuse_memcg(void)
> -{
> -	current->active_memcg = NULL;
> +	return old;
>  }
>  #else
> -static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
> -{
> -}
> -
> -static inline void memalloc_unuse_memcg(void)
> +static inline struct mem_cgroup *
> +memalloc_use_memcg(struct mem_cgroup *memcg)
>  {
> +	return NULL;
>  }
>  #endif
>  
> -- 
> 2.24.1
> 




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux