Re: [PATCH] mm, memcg: Add memory.transparent_hugepage_disabled

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> 2020年3月24日 19:00,Michal Hocko <mhocko@xxxxxxxxxx> 写道:
> 
> On Tue 24-03-20 18:31:56, Hui Zhu wrote:
>> /sys/kernel/mm/transparent_hugepage/enabled is the only interface to
>> control if the application can use THP in system level.
>> Sometime, we would not want an application use THP even if
>> transparent_hugepage/enabled is set to "always" or "madvise" because
>> thp may need more cpu and memory resources in some cases.
> 
> Could you specify that sometime by a real usecase in the memcg context
> please?


Thanks for your review.

We use thp+balloon to supply more memory flexibility for vm.
https://lore.kernel.org/linux-mm/1584893097-12317-1-git-send-email-teawater@xxxxxxxxx/
This is another thread that I am working around thp+balloon.

Other applications are already deployed on these machines.  The transparent_hugepage/enabled is set to never because they used to have a lot of THP related performance issues.
And some of them may call madvise thp with itself.

Then even if I set transparent_hugepage/enabled to madvise.  These programs are still at risk of THP-related performance issues.  That is why I need this cgroup thp switch.

> 
>> This commit add a new interface memory.transparent_hugepage_disabled
>> in memcg.
>> When it set to 1, the application inside the cgroup cannot use THP
>> except dax.
> 
> Why should this interface differ from the global semantic. How does it
> relate to the kcompactd. Your patch also doesn't seem to define this
> knob to have hierarchical semantic. Why?
> 

According to my previous description, I didn’t get any need to add transparent_hugepage/enabled like interface.
That is why just add a switch.

What about add a transparent_hugepage/enabled like interface?

Thanks,
Hui

> All that being said, this patch is lacking both proper justification and
> the semantic is dubious to be honest. I have also say that I am not a
> great fan. THP semantic is overly complex already and adding more on top
> would require really strong usecase.
> 
>> Signed-off-by: Hui Zhu <teawaterz@xxxxxxxxxxxxxxxxx>
>> ---
>> include/linux/huge_mm.h    | 18 ++++++++++++++++--
>> include/linux/memcontrol.h |  2 ++
>> mm/memcontrol.c            | 42 ++++++++++++++++++++++++++++++++++++++++++
>> mm/shmem.c                 |  4 ++++
>> 4 files changed, 64 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 5aca3d1..fd81479 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -91,6 +91,16 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
>> 
>> extern unsigned long transparent_hugepage_flags;
>> 
>> +#ifdef CONFIG_MEMCG
>> +extern bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma);
>> +#else
>> +static inline bool
>> +memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
>> +{
>> +	return false;
>> +}
>> +#endif
>> +
>> /*
>>  * to be used on vmas which are known to support THP.
>>  * Use transparent_hugepage_enabled otherwise
>> @@ -106,8 +116,6 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>> 	if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>> 		return false;
>> 
>> -	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
>> -		return true;
>> 	/*
>> 	 * For dax vmas, try to always use hugepage mappings. If the kernel does
>> 	 * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
>> @@ -117,6 +125,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>> 	if (vma_is_dax(vma))
>> 		return true;
>> 
>> +	if (memcg_transparent_hugepage_disabled(vma))
>> +		return false;
>> +
>> +	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
>> +		return true;
>> +
>> 	if (transparent_hugepage_flags &
>> 				(1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
>> 		return !!(vma->vm_flags & VM_HUGEPAGE);
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index a7a0a1a5..abc3142 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -320,6 +320,8 @@ struct mem_cgroup {
>> 
>> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> 	struct deferred_split deferred_split_queue;
>> +
>> +	bool transparent_hugepage_disabled;
>> #endif
>> 
>> 	struct mem_cgroup_per_node *nodeinfo[0];
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 7a4bd8b..b6d91b6 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -5011,6 +5011,14 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>> 	if (parent) {
>> 		memcg->swappiness = mem_cgroup_swappiness(parent);
>> 		memcg->oom_kill_disable = parent->oom_kill_disable;
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +		memcg->transparent_hugepage_disabled
>> +			= parent->transparent_hugepage_disabled;
>> +#endif
>> +	} else {
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +		memcg->transparent_hugepage_disabled = false;
>> +#endif
>> 	}
>> 	if (parent && parent->use_hierarchy) {
>> 		memcg->use_hierarchy = true;
>> @@ -6126,6 +6134,24 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
>> 	return nbytes;
>> }
>> 
>> +static u64 transparent_hugepage_disabled_read(struct cgroup_subsys_state *css,
>> +					      struct cftype *cft)
>> +{
>> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +
>> +	return memcg->transparent_hugepage_disabled;
>> +}
>> +
>> +static int transparent_hugepage_disabled_write(struct cgroup_subsys_state *css,
>> +					       struct cftype *cft, u64 val)
>> +{
>> +	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +
>> +	memcg->transparent_hugepage_disabled = !!val;
>> +
>> +	return 0;
>> +}
>> +
>> static struct cftype memory_files[] = {
>> 	{
>> 		.name = "current",
>> @@ -6179,6 +6205,12 @@ static struct cftype memory_files[] = {
>> 		.seq_show = memory_oom_group_show,
>> 		.write = memory_oom_group_write,
>> 	},
>> +	{
>> +		.name = "transparent_hugepage_disabled",
>> +		.flags = CFTYPE_NOT_ON_ROOT,
>> +		.read_u64 = transparent_hugepage_disabled_read,
>> +		.write_u64 = transparent_hugepage_disabled_write,
>> +	},
>> 	{ }	/* terminate */
>> };
>> 
>> @@ -6787,6 +6819,16 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
>> 	refill_stock(memcg, nr_pages);
>> }
>> 
>> +bool memcg_transparent_hugepage_disabled(struct vm_area_struct *vma)
>> +{
>> +	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(vma->vm_mm);
>> +
>> +	if (memcg && memcg->transparent_hugepage_disabled)
>> +		return true;
>> +
>> +	return false;
>> +}
>> +
>> static int __init cgroup_memory(char *s)
>> {
>> 	char *token;
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index aad3ba7..253b63b 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1810,6 +1810,10 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
>> 		goto alloc_nohuge;
>> 	if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
>> 		goto alloc_nohuge;
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +	if (memcg_transparent_hugepage_disabled(vma))
>> +		goto alloc_nohuge;
>> +#endif
>> 	if (shmem_huge == SHMEM_HUGE_FORCE)
>> 		goto alloc_huge;
>> 	switch (sbinfo->huge) {
>> -- 
>> 2.7.4
> 
> -- 
> Michal Hocko
> SUSE Labs




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux