The patch titled Subject: mm: tidy up shmem mTHP controls and stats has been added to the -mm mm-unstable branch. Its filename is mm-tidy-up-shmem-mthp-controls-and-stats.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-tidy-up-shmem-mthp-controls-and-stats.patch This patch will later appear in the mm-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Ryan Roberts <ryan.roberts@xxxxxxx> Subject: mm: tidy up shmem mTHP controls and stats Date: Thu, 8 Aug 2024 12:18:47 +0100 Previously we had a situation where shmem mTHP controls and stats were not exposed for some supported sizes and were exposed for some unsupported sizes. So let's clean that up. Anon mTHP can support all large orders [2, PMD_ORDER]. But shmem can support all large orders [1, MAX_PAGECACHE_ORDER]. However, per-size shmem controls and stats were previously being exposed for all the anon mTHP orders, meaning order-1 was not present, and for arm64 64K base pages, orders 12 and 13 were exposed but were not supported internally. Tidy this all up by defining ctrl and stats attribute groups for anon and file separately. Anon ctrl and stats groups are populated for all orders in THP_ORDERS_ALL_ANON and file ctrl and stats groups are populated for all orders in THP_ORDERS_ALL_FILE_DEFAULT. Additionally, create "any" ctrl and stats attribute groups which are populated for all orders in (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT). swpout stats use this since they apply to anon and shmem. The side-effect of all this is that different hugepage-*kB directories contain different sets of controls and stats, depending on which memory types support that size. This approach is preferred over the alternative, which is to populate dummy controls and stats for memory types that do not support a given size. Link: https://lkml.kernel.org/r/20240808111849.651867-3-ryan.roberts@xxxxxxx Signed-off-by: Ryan Roberts <ryan.roberts@xxxxxxx> Cc: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> Cc: Barry Song <baohua@xxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Gavin Shan <gshan@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Lance Yang <ioworker0@xxxxxxxxx> Cc: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/huge_memory.c | 144 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 114 insertions(+), 30 deletions(-) --- a/mm/huge_memory.c~mm-tidy-up-shmem-mthp-controls-and-stats +++ a/mm/huge_memory.c @@ -459,8 +459,8 @@ static void thpsize_release(struct kobje static DEFINE_SPINLOCK(huge_anon_orders_lock); static LIST_HEAD(thpsize_list); -static ssize_t thpsize_enabled_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t anon_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { int order = to_thpsize(kobj)->order; const char *output; @@ -477,9 +477,9 @@ static ssize_t thpsize_enabled_show(stru return sysfs_emit(buf, "%s\n", output); } -static ssize_t thpsize_enabled_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) +static ssize_t anon_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) { int order = to_thpsize(kobj)->order; ssize_t ret = count; @@ -521,19 +521,35 @@ static ssize_t thpsize_enabled_store(str return ret; } -static struct kobj_attribute thpsize_enabled_attr = - __ATTR(enabled, 0644, thpsize_enabled_show, thpsize_enabled_store); +static struct kobj_attribute anon_enabled_attr = + __ATTR(enabled, 0644, anon_enabled_show, anon_enabled_store); -static struct attribute *thpsize_attrs[] = { - &thpsize_enabled_attr.attr, +static struct attribute *anon_ctrl_attrs[] = { + &anon_enabled_attr.attr, + NULL, +}; + +static const struct attribute_group anon_ctrl_attr_grp = { + .attrs = anon_ctrl_attrs, +}; + +static struct attribute *file_ctrl_attrs[] = { #ifdef CONFIG_SHMEM &thpsize_shmem_enabled_attr.attr, #endif NULL, }; -static const struct attribute_group thpsize_attr_group = { - .attrs = thpsize_attrs, +static const struct attribute_group file_ctrl_attr_grp = { + .attrs = file_ctrl_attrs, +}; + +static struct attribute *any_ctrl_attrs[] = { + NULL, +}; + +static const struct attribute_group any_ctrl_attr_grp = { + .attrs = any_ctrl_attrs, }; static const struct kobj_type thpsize_ktype = { @@ -572,64 +588,132 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallbac DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT); DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK); +#ifdef CONFIG_SHMEM DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC); DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK); DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE); +#endif DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT); DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED); DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED); -static struct attribute *stats_attrs[] = { +static struct attribute *anon_stats_attrs[] = { &anon_fault_alloc_attr.attr, &anon_fault_fallback_attr.attr, &anon_fault_fallback_charge_attr.attr, +#ifndef CONFIG_SHMEM &swpout_attr.attr, &swpout_fallback_attr.attr, - &shmem_alloc_attr.attr, - &shmem_fallback_attr.attr, - &shmem_fallback_charge_attr.attr, +#endif &split_attr.attr, &split_failed_attr.attr, &split_deferred_attr.attr, NULL, }; -static struct attribute_group stats_attr_group = { +static struct attribute_group anon_stats_attr_grp = { .name = "stats", - .attrs = stats_attrs, + .attrs = anon_stats_attrs, }; +static struct attribute *file_stats_attrs[] = { +#ifdef CONFIG_SHMEM + &shmem_alloc_attr.attr, + &shmem_fallback_attr.attr, + &shmem_fallback_charge_attr.attr, +#endif + NULL, +}; + +static struct attribute_group file_stats_attr_grp = { + .name = "stats", + .attrs = file_stats_attrs, +}; + +static struct attribute *any_stats_attrs[] = { +#ifdef CONFIG_SHMEM + &swpout_attr.attr, + &swpout_fallback_attr.attr, +#endif + NULL, +}; + +static struct attribute_group any_stats_attr_grp = { + .name = "stats", + .attrs = any_stats_attrs, +}; + +static int sysfs_add_group(struct kobject *kobj, + const struct attribute_group *grp) +{ + int ret = -ENOENT; + + /* + * If the group is named, try to merge first, assuming the subdirectory + * was already created. This avoids the warning emitted by + * sysfs_create_group() if the directory already exists. + */ + if (grp->name) + ret = sysfs_merge_group(kobj, grp); + if (ret) + ret = sysfs_create_group(kobj, grp); + + return ret; +} + static struct thpsize *thpsize_create(int order, struct kobject *parent) { unsigned long size = (PAGE_SIZE << order) / SZ_1K; struct thpsize *thpsize; - int ret; + int ret = -ENOMEM; thpsize = kzalloc(sizeof(*thpsize), GFP_KERNEL); if (!thpsize) - return ERR_PTR(-ENOMEM); + goto err; + + thpsize->order = order; ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent, "hugepages-%lukB", size); if (ret) { kfree(thpsize); - return ERR_PTR(ret); + goto err; } - ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group); - if (ret) { - kobject_put(&thpsize->kobj); - return ERR_PTR(ret); + + ret = sysfs_add_group(&thpsize->kobj, &any_ctrl_attr_grp); + if (ret) + goto err_put; + + ret = sysfs_add_group(&thpsize->kobj, &any_stats_attr_grp); + if (ret) + goto err_put; + + if (BIT(order) & THP_ORDERS_ALL_ANON) { + ret = sysfs_add_group(&thpsize->kobj, &anon_ctrl_attr_grp); + if (ret) + goto err_put; + + ret = sysfs_add_group(&thpsize->kobj, &anon_stats_attr_grp); + if (ret) + goto err_put; } - ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group); - if (ret) { - kobject_put(&thpsize->kobj); - return ERR_PTR(ret); + if (BIT(order) & THP_ORDERS_ALL_FILE_DEFAULT) { + ret = sysfs_add_group(&thpsize->kobj, &file_ctrl_attr_grp); + if (ret) + goto err_put; + + ret = sysfs_add_group(&thpsize->kobj, &file_stats_attr_grp); + if (ret) + goto err_put; } - thpsize->order = order; return thpsize; +err_put: + kobject_put(&thpsize->kobj); +err: + return ERR_PTR(ret); } static void thpsize_release(struct kobject *kobj) @@ -669,7 +753,7 @@ static int __init hugepage_init_sysfs(st goto remove_hp_group; } - orders = THP_ORDERS_ALL_ANON; + orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT; order = highest_order(orders); while (orders) { thpsize = thpsize_create(order, *hugepage_kobj); _ Patches currently in -mm which might be from ryan.roberts@xxxxxxx are mm-cleanup-count_mthp_stat-definition.patch mm-tidy-up-shmem-mthp-controls-and-stats.patch