Re: [PATCH v4 3/6] mm: shmem: add multi-size THP sysfs interface for anonymous shmem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2024/6/10 20:23, Daniel Gomez wrote:
Hi Baolin,
On Tue, Jun 04, 2024 at 06:17:47PM +0800, Baolin Wang wrote:
To support the use of mTHP with anonymous shmem, add a new sysfs interface
'shmem_enabled' in the '/sys/kernel/mm/transparent_hugepage/hugepages-kB/'
directory for each mTHP to control whether shmem is enabled for that mTHP,
with a value similar to the top level 'shmem_enabled', which can be set to:
"always", "inherit (to inherit the top level setting)", "within_size", "advise",
"never". An 'inherit' option is added to ensure compatibility with these
global settings, and the options 'force' and 'deny' are dropped, which are
rather testing artifacts from the old ages.

By default, PMD-sized hugepages have enabled="inherit" and all other hugepage
sizes have enabled="never" for '/sys/kernel/mm/transparent_hugepage/hugepages-xxkB/shmem_enabled'.

In addition, if top level value is 'force', then only PMD-sized hugepages
have enabled="inherit", otherwise configuration will be failed and vice versa.
That means now we will avoid using non-PMD sized THP to override the global
huge allocation.

Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
---
  Documentation/admin-guide/mm/transhuge.rst | 23 ++++++
  include/linux/huge_mm.h                    | 10 +++
  mm/huge_memory.c                           | 11 +--
  mm/shmem.c                                 | 96 ++++++++++++++++++++++
  4 files changed, 132 insertions(+), 8 deletions(-)

diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index d414d3f5592a..b76d15e408b3 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -332,6 +332,29 @@ deny
  force
      Force the huge option on for all - very useful for testing;
+Shmem can also use "multi-size THP" (mTHP) by adding a new sysfs knob to control
+mTHP allocation: '/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled',
+and its value for each mTHP is essentially consistent with the global setting.
+An 'inherit' option is added to ensure compatibility with these global settings.
+Conversely, the options 'force' and 'deny' are dropped, which are rather testing
+artifacts from the old ages.
+always
+    Attempt to allocate <size> huge pages every time we need a new page;
+
+inherit
+    Inherit the top-level "shmem_enabled" value. By default, PMD-sized hugepages
+    have enabled="inherit" and all other hugepage sizes have enabled="never";
+
+never
+    Do not allocate <size> huge pages;
+
+within_size
+    Only allocate <size> huge page if it will be fully within i_size.
+    Also respect fadvise()/madvise() hints;
+
+advise
+    Only allocate <size> huge pages if requested with fadvise()/madvise();
+
  Need of application restart
  ===========================
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 020e2344eb86..fac21548c5de 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,6 +6,7 @@
  #include <linux/mm_types.h>
#include <linux/fs.h> /* only for vma_is_dax() */
+#include <linux/kobject.h>
vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
  int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -63,6 +64,7 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf,
  				  enum transparent_hugepage_flag flag);
  extern struct kobj_attribute shmem_enabled_attr;
+extern struct kobj_attribute thpsize_shmem_enabled_attr;
/*
   * Mask of all large folio orders supported for anonymous THP; all orders up to
@@ -265,6 +267,14 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
  	return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
  }
+struct thpsize {
+	struct kobject kobj;
+	struct list_head node;
+	int order;
+};
+
+#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
+
  enum mthp_stat_item {
  	MTHP_STAT_ANON_FAULT_ALLOC,
  	MTHP_STAT_ANON_FAULT_FALLBACK,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8e49f402d7c7..1360a1903b66 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -449,14 +449,6 @@ static void thpsize_release(struct kobject *kobj);
  static DEFINE_SPINLOCK(huge_anon_orders_lock);
  static LIST_HEAD(thpsize_list);
-struct thpsize {
-	struct kobject kobj;
-	struct list_head node;
-	int order;
-};
-
-#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
-
  static ssize_t thpsize_enabled_show(struct kobject *kobj,
  				    struct kobj_attribute *attr, char *buf)
  {
@@ -517,6 +509,9 @@ static struct kobj_attribute thpsize_enabled_attr =
static struct attribute *thpsize_attrs[] = {
  	&thpsize_enabled_attr.attr,
+#ifdef CONFIG_SHMEM
+	&thpsize_shmem_enabled_attr.attr,
+#endif
  	NULL,
  };
diff --git a/mm/shmem.c b/mm/shmem.c
index ae358efc397a..643ff7516b4d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -131,6 +131,14 @@ struct shmem_options {
  #define SHMEM_SEEN_QUOTA 32
  };
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static unsigned long huge_anon_shmem_orders_always __read_mostly;
+static unsigned long huge_anon_shmem_orders_madvise __read_mostly;
+static unsigned long huge_anon_shmem_orders_inherit __read_mostly;
+static unsigned long huge_anon_shmem_orders_within_size __read_mostly;
+static DEFINE_SPINLOCK(huge_anon_shmem_orders_lock);
+#endif

Since we are also applying the new sysfs knob controls to tmpfs and anon mm,
should we rename this to get rid of the anon prefix?

Sure. I want to do this in the patch set of mTHP support tmpfs originally, but yes, I can just drop the 'anon' prefix as a preparation.




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux