Signed-off-by: Ryan Roberts <ryan.roberts@xxxxxxx>
---
Documentation/admin-guide/mm/transhuge.rst | 13 +++
include/linux/huge_mm.h | 6 +-
include/linux/pagemap.h | 17 ++-
mm/filemap.c | 6 +-
mm/huge_memory.c | 117 ++++++++++++++++-----
5 files changed, 128 insertions(+), 31 deletions(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 058485daf186..d4857e457add 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -512,6 +512,19 @@ shmem_fallback_charge
falls back to using small pages even though the allocation was
successful.
+file_alloc
+ is incremented every time a file huge page is successfully
+ allocated.
+
+file_fallback
+ is incremented if a file huge page is attempted to be allocated
+ but fails and instead falls back to using small pages.
+
+file_fallback_charge
+ is incremented if a file huge page cannot be charged and instead
+ falls back to using small pages even though the allocation was
+ successful.
+
split
is incremented every time a huge page is successfully split into
smaller orders. This can happen for a variety of reasons but a
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index cb93b9009ce4..b4fba11976f2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -117,6 +117,9 @@ enum mthp_stat_item {
MTHP_STAT_SHMEM_ALLOC,
MTHP_STAT_SHMEM_FALLBACK,
MTHP_STAT_SHMEM_FALLBACK_CHARGE,
+ MTHP_STAT_FILE_ALLOC,
+ MTHP_STAT_FILE_FALLBACK,
+ MTHP_STAT_FILE_FALLBACK_CHARGE,
MTHP_STAT_SPLIT,
MTHP_STAT_SPLIT_FAILED,
MTHP_STAT_SPLIT_DEFERRED,
@@ -292,11 +295,10 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
struct thpsize {
struct kobject kobj;
- struct list_head node;
int order;
};
-#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
+#define to_thpsize(_kobj) container_of(_kobj, struct thpsize, kobj)
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6e2f72d03176..f45a1ba6d9b6 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -365,6 +365,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
*/
#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)
#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
+#define PAGECACHE_LARGE_ORDERS ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0))
/**
* mapping_set_large_folios() - Indicate the file supports large folios.
@@ -562,14 +563,26 @@ static inline void *detach_page_private(struct page *page)
}
#ifdef CONFIG_NUMA
-struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
+struct folio *__filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
#else
-static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
+static inline struct folio *__filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
{
return folio_alloc_noprof(gfp, order);
}
#endif
+static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
+{
+ struct folio *folio;
+
+ folio = __filemap_alloc_folio_noprof(gfp, order);
+
+ if (!folio)
+ count_mthp_stat(order, MTHP_STAT_FILE_FALLBACK);
+
+ return folio;
+}
+
#define filemap_alloc_folio(...) \
alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
diff --git a/mm/filemap.c b/mm/filemap.c
index 53d5d0410b51..131d514fca29 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -963,6 +963,8 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
int ret;
ret = mem_cgroup_charge(folio, NULL, gfp);
+ count_mthp_stat(folio_order(folio),
+ ret ? MTHP_STAT_FILE_FALLBACK_CHARGE : MTHP_STAT_FILE_ALLOC);
if (ret)
return ret;
@@ -990,7 +992,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
EXPORT_SYMBOL_GPL(filemap_add_folio);
#ifdef CONFIG_NUMA
-struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
+struct folio *__filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
{
int n;
struct folio *folio;
@@ -1007,7 +1009,7 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
}
return folio_alloc_noprof(gfp, order);
}
-EXPORT_SYMBOL(filemap_alloc_folio_noprof);
+EXPORT_SYMBOL(__filemap_alloc_folio_noprof);
#endif
/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f9696c94e211..559553e2a662 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -452,8 +452,9 @@ static const struct attribute_group hugepage_attr_group = {
static void hugepage_exit_sysfs(struct kobject *hugepage_kobj);
static void thpsize_release(struct kobject *kobj);
+static void thpsize_child_release(struct kobject *kobj);
static DEFINE_SPINLOCK(huge_anon_orders_lock);
-static LIST_HEAD(thpsize_list);
+static LIST_HEAD(thpsize_child_list);
static ssize_t thpsize_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -537,6 +538,18 @@ static const struct kobj_type thpsize_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
};
+static const struct kobj_type thpsize_child_ktype = {
+ .release = &thpsize_child_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+struct thpsize_child {
+ struct kobject kobj;
+ struct list_head node;
+};
+
+#define to_thpsize_child(_kobj) container_of(_kobj, struct thpsize, kobj)
+
DEFINE_PER_CPU(struct mthp_stat, mthp_stats) = {{{0}}};
static unsigned long sum_mthp_stat(int order, enum mthp_stat_item item)
@@ -557,7 +570,7 @@ static unsigned long sum_mthp_stat(int order, enum mthp_stat_item item)
static ssize_t _name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf) \
{ \
- int order = to_thpsize(kobj)->order; \
+ int order = to_thpsize(kobj->parent)->order; \
\
return sysfs_emit(buf, "%lu\n", sum_mthp_stat(order, _index)); \
} \
@@ -591,41 +604,93 @@ static struct attribute *stats_attrs[] = {
};
static struct attribute_group stats_attr_group = {
- .name = "stats",
.attrs = stats_attrs,
};
-static struct thpsize *thpsize_create(int order, struct kobject *parent)
+DEFINE_MTHP_STAT_ATTR(file_alloc, MTHP_STAT_FILE_ALLOC);
+DEFINE_MTHP_STAT_ATTR(file_fallback, MTHP_STAT_FILE_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(file_fallback_charge, MTHP_STAT_FILE_FALLBACK_CHARGE);
+
+static struct attribute *file_stats_attrs[] = {
+ &file_alloc_attr.attr,
+ &file_fallback_attr.attr,
+ &file_fallback_charge_attr.attr,
+ NULL,
+};
+
+static struct attribute_group file_stats_attr_group = {
+ .attrs = file_stats_attrs,
+};
+
+static int thpsize_create(int order, struct kobject *parent)
{
unsigned long size = (PAGE_SIZE << order) / SZ_1K;
+ struct thpsize_child *stats;
struct thpsize *thpsize;
int ret;
+ /*
+ * Each child object (currently only "stats" directory) holds a
+ * reference to the top-level thpsize object, so we can drop our ref to
+ * the top-level once stats is setup. Then we just need to drop a
+ * reference on any children to clean everything up. We can't just use
+ * the attr group name for the stats subdirectory because there may be
+ * multiple attribute groups to populate inside stats and overlaying
+ * using the name property isn't supported in that way; each attr group
+ * name, if provided, must be unique in the parent directory.
+ */
+
thpsize = kzalloc(sizeof(*thpsize), GFP_KERNEL);
- if (!thpsize)
- return ERR_PTR(-ENOMEM);
+ if (!thpsize) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ thpsize->order = order;
ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent,
"hugepages-%lukB", size);
if (ret) {
kfree(thpsize);
- return ERR_PTR(ret);
+ goto err;
}
- ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group);
- if (ret) {
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats) {
kobject_put(&thpsize->kobj);
- return ERR_PTR(ret);
+ ret = -ENOMEM;
+ goto err;
}
- ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group);
+ ret = kobject_init_and_add(&stats->kobj, &thpsize_child_ktype,
+ &thpsize->kobj, "stats");
+ kobject_put(&thpsize->kobj);
if (ret) {
- kobject_put(&thpsize->kobj);
- return ERR_PTR(ret);
+ kfree(stats);
+ goto err;
}
- thpsize->order = order;
- return thpsize;
+ if (BIT(order) & THP_ORDERS_ALL_ANON) {
+ ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group);
+ if (ret)
+ goto err_put;
+
+ ret = sysfs_create_group(&stats->kobj, &stats_attr_group);
+ if (ret)
+ goto err_put;
+ }
+
+ if (BIT(order) & PAGECACHE_LARGE_ORDERS) {
+ ret = sysfs_create_group(&stats->kobj, &file_stats_attr_group);
+ if (ret)
+ goto err_put;
+ }
+
+ list_add(&stats->node, &thpsize_child_list);
+ return 0;
+err_put: