Re: [RFC v1] memcg: add memcg lru for page reclaiming

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon 21-10-19 19:56:54, Hillf Danton wrote:
> 
> Currently soft limit reclaim is frozen, see
> Documentation/admin-guide/cgroup-v2.rst for reasons.
> 
> Copying the page lru idea, memcg lru is added for selecting victim
> memcg to reclaim pages from under memory pressure. It now works in
> parallel to slr not only because the latter needs some time to reap
> but the coexistence facilitates it a lot to add the lru in a straight
> forward manner.

This doesn't explain what is the problem/feature you would like to
fix/achieve. It also doesn't explain the overall design. 

> A lru list paired with a spin lock is added, thanks to the current
> memcg high_work that provides other things it needs, and a couple of
> helpers to add memcg to and pick victim from lru.
> 
> V1 is based on 5.4-rc3.
> 
> Changes since v0
> - add MEMCG_LRU in init/Kconfig
> - drop changes in mm/vmscan.c
> - make memcg lru work in parallel to slr
> 
> Cc: Chris Down <chris@xxxxxxxxxxxxxx>
> Cc: Tejun Heo <tj@xxxxxxxxxx>
> Cc: Roman Gushchin <guro@xxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxxxx>
> Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
> Cc: Minchan Kim <minchan@xxxxxxxxxx>
> Cc: Mel Gorman <mgorman@xxxxxxx>
> Signed-off-by: Hillf Danton <hdanton@xxxxxxxx>
> ---
> 
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -843,6 +843,14 @@ config MEMCG
>  	help
>  	  Provides control over the memory footprint of tasks in a cgroup.
>  
> +config MEMCG_LRU
> +	bool
> +	depends on MEMCG
> +	help
> +	  Select victim memcg on lru for page reclaiming.
> +
> +	  Say N if unsure.
> +
>  config MEMCG_SWAP
>  	bool "Swap controller"
>  	depends on MEMCG && SWAP
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -223,6 +223,10 @@ struct mem_cgroup {
>  	/* Upper bound of normal memory consumption range */
>  	unsigned long high;
>  
> +#ifdef CONFIG_MEMCG_LRU
> +	struct list_head lru_node;
> +#endif
> +
>  	/* Range enforcement for interrupt charges */
>  	struct work_struct high_work;
>  
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2338,14 +2338,54 @@ static int memcg_hotplug_cpu_dead(unsign
>  	return 0;
>  }
>  
> +#ifdef CONFIG_MEMCG_LRU
> +static DEFINE_SPINLOCK(memcg_lru_lock);
> +static LIST_HEAD(memcg_lru);	/* a copy of page lru */
> +
> +static void memcg_add_lru(struct mem_cgroup *memcg)
> +{
> +	spin_lock_irq(&memcg_lru_lock);
> +	if (list_empty(&memcg->lru_node))
> +		list_add_tail(&memcg->lru_node, &memcg_lru);
> +	spin_unlock_irq(&memcg_lru_lock);
> +}
> +
> +static struct mem_cgroup *memcg_pick_lru(void)
> +{
> +	struct mem_cgroup *memcg, *next;
> +
> +	spin_lock_irq(&memcg_lru_lock);
> +
> +	list_for_each_entry_safe(memcg, next, &memcg_lru, lru_node) {
> +		list_del_init(&memcg->lru_node);
> +
> +		if (page_counter_read(&memcg->memory) > memcg->high) {
> +			spin_unlock_irq(&memcg_lru_lock);
> +			return memcg;
> +		}
> +	}
> +	spin_unlock_irq(&memcg_lru_lock);
> +
> +	return NULL;
> +}
> +#endif
> +
>  static void reclaim_high(struct mem_cgroup *memcg,
>  			 unsigned int nr_pages,
>  			 gfp_t gfp_mask)
>  {
> +#ifdef CONFIG_MEMCG_LRU
> +	struct mem_cgroup *start = memcg;
> +#endif
>  	do {
>  		if (page_counter_read(&memcg->memory) <= memcg->high)
>  			continue;
>  		memcg_memory_event(memcg, MEMCG_HIGH);
> +		if (IS_ENABLED(CONFIG_MEMCG_LRU))
> +			if (start != memcg) {
> +				memcg_add_lru(memcg);
> +				return;
> +			}
>  		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
>  	} while ((memcg = parent_mem_cgroup(memcg)));
>  }
> @@ -3158,6 +3198,13 @@ unsigned long mem_cgroup_soft_limit_recl
>  	unsigned long excess;
>  	unsigned long nr_scanned;
>  
> +	if (IS_ENABLED(CONFIG_MEMCG_LRU)) {
> +		struct mem_cgroup *memcg = memcg_pick_lru();
> +		if (memcg)
> +			schedule_work(&memcg->high_work);
> +		return 0;
> +	}
> +
>  	if (order > 0)
>  		return 0;
>  
> @@ -5068,6 +5115,8 @@ static struct mem_cgroup *mem_cgroup_all
>  	if (memcg_wb_domain_init(memcg, GFP_KERNEL))
>  		goto fail;
>  
> +	if (IS_ENABLED(CONFIG_MEMCG_LRU))
> +		INIT_LIST_HEAD(&memcg->lru_node);
>  	INIT_WORK(&memcg->high_work, high_work_func);
>  	memcg->last_scanned_node = MAX_NUMNODES;
>  	INIT_LIST_HEAD(&memcg->oom_notify);
> --
> 

-- 
Michal Hocko
SUSE Labs




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux