On 08/08/2024 23:11, Barry Song wrote: > On Thu, Aug 8, 2024 at 11:19 PM Ryan Roberts <ryan.roberts@xxxxxxx> wrote: >> >> Previously we had a situation where shmem mTHP controls and stats were >> not exposed for some supported sizes and were exposed for some >> unsupported sizes. So let's clean that up. >> >> Anon mTHP can support all large orders [2, PMD_ORDER]. But shmem can >> support all large orders [1, MAX_PAGECACHE_ORDER]. However, per-size >> shmem controls and stats were previously being exposed for all the anon >> mTHP orders, meaning order-1 was not present, and for arm64 64K base >> pages, orders 12 and 13 were exposed but were not supported internally. >> >> Tidy this all up by defining ctrl and stats attribute groups for anon >> and file separately. Anon ctrl and stats groups are populated for all >> orders in THP_ORDERS_ALL_ANON and file ctrl and stats groups are >> populated for all orders in THP_ORDERS_ALL_FILE_DEFAULT. >> >> Additionally, create "any" ctrl and stats attribute groups which are >> populated for all orders in (THP_ORDERS_ALL_ANON | >> THP_ORDERS_ALL_FILE_DEFAULT). swpout stats use this since they apply to >> anon and shmem. > > This passage is a bit confusing to me. I'm not sure whether it's about > creating any control and stats for both file and anon, or creating them > separately depending on the situation. However, the previous passage > mentions that file and anon create control and stats separately based > on different orders, and the orders they create might differ. Yes, I'll admit that I added this paragraph for the change I made since the last version, and it's not the clearest thing I've ever written on reflection. The idea is that there are 3 buckets; "anon", "file" and "any". controls/stats in the "anon" bucket are populated for all orders supported by anon memory. Same for file. controls/stats in the "any" bucket are populated for all orders supported by either anon or file. > > But after running your patches, I understood it. For instance, I'm now > seeing 8kB folder that didn't exist before: > > /sys/kernel/mm/transparent_hugepage # ls -l > drwxr-xr-x 3 root root 0 Aug 8 22:01 hugepages-1024kB > ... > drwxr-xr-x 3 root root 0 Aug 8 22:01 hugepages-8kB > drwxr-xr-x 2 root root 0 Aug 8 22:01 khugepaged > ... > > Then, when I entered the 8kB folder, I found 'shmem_enabled' > but not 'enabled' for anon: > /sys/kernel/mm/transparent_hugepage/hugepages-8kB # ls > shmem_enabled stats > > However, in the 16kB folder, I found both: > /sys/kernel/mm/transparent_hugepage/hugepages-16kB # ls > enabled shmem_enabled stats > > In the 8kB 'stats,' I see 'shmem_alloc' but not 'anon_alloc.' Since > both shmem and anon require swapout, I am seeing 'swpout' and > 'swpout_fallback': > > /sys/kernel/mm/transparent_hugepage/hugepages-8kB/stats # ls > shmem_alloc shmem_fallback shmem_fallback_charge swpout > swpout_fallback > > Everything observed seems to meet expectations, so: Yes, what you observe is exactly as intended. It gets a bit tricky if CONFIG_SHMEM is disabled; In this case, some ifdeffery ensures that the swpout stats are declared in the "anon" bucket instead of the "any" bucket. > > Tested-by: Barry Song <baohua@xxxxxxxxxx> Thanks! > >> >> The side-effect of all this is that different hugepage-*kB directories >> contain different sets of controls and stats, depending on which memory >> types support that size. This approach is preferred over the >> alternative, which is to populate dummy controls and stats for memory >> types that do not support a given size. >> >> Signed-off-by: Ryan Roberts <ryan.roberts@xxxxxxx> >> --- >> mm/huge_memory.c | 144 +++++++++++++++++++++++++++++++++++++---------- >> 1 file changed, 114 insertions(+), 30 deletions(-) >> >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 0c3075ee00012..082d86b7c6c2f 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -482,8 +482,8 @@ static void thpsize_release(struct kobject *kobj); >> static DEFINE_SPINLOCK(huge_anon_orders_lock); >> static LIST_HEAD(thpsize_list); >> >> -static ssize_t thpsize_enabled_show(struct kobject *kobj, >> - struct kobj_attribute *attr, char *buf) >> +static ssize_t anon_enabled_show(struct kobject *kobj, >> + struct kobj_attribute *attr, char *buf) >> { >> int order = to_thpsize(kobj)->order; >> const char *output; >> @@ -500,9 +500,9 @@ static ssize_t thpsize_enabled_show(struct kobject *kobj, >> return sysfs_emit(buf, "%s\n", output); >> } >> >> -static ssize_t thpsize_enabled_store(struct kobject *kobj, >> - struct kobj_attribute *attr, >> - const char *buf, size_t count) >> +static ssize_t anon_enabled_store(struct kobject *kobj, >> + struct kobj_attribute *attr, >> + const char *buf, size_t count) >> { >> int order = to_thpsize(kobj)->order; >> ssize_t ret = count; >> @@ -544,19 +544,35 @@ static ssize_t thpsize_enabled_store(struct kobject *kobj, >> return ret; >> } >> >> -static struct kobj_attribute thpsize_enabled_attr = >> - __ATTR(enabled, 0644, thpsize_enabled_show, thpsize_enabled_store); >> +static struct kobj_attribute anon_enabled_attr = >> + __ATTR(enabled, 0644, anon_enabled_show, anon_enabled_store); >> >> -static struct attribute *thpsize_attrs[] = { >> - &thpsize_enabled_attr.attr, >> +static struct attribute *anon_ctrl_attrs[] = { >> + &anon_enabled_attr.attr, >> + NULL, >> +}; >> + >> +static const struct attribute_group anon_ctrl_attr_grp = { >> + .attrs = anon_ctrl_attrs, >> +}; >> + >> +static struct attribute *file_ctrl_attrs[] = { >> #ifdef CONFIG_SHMEM >> &thpsize_shmem_enabled_attr.attr, >> #endif >> NULL, >> }; >> >> -static const struct attribute_group thpsize_attr_group = { >> - .attrs = thpsize_attrs, >> +static const struct attribute_group file_ctrl_attr_grp = { >> + .attrs = file_ctrl_attrs, >> +}; >> + >> +static struct attribute *any_ctrl_attrs[] = { >> + NULL, >> +}; >> + >> +static const struct attribute_group any_ctrl_attr_grp = { >> + .attrs = any_ctrl_attrs, >> }; >> >> static const struct kobj_type thpsize_ktype = { >> @@ -595,64 +611,132 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK); >> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE); >> DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT); >> DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK); >> +#ifdef CONFIG_SHMEM >> DEFINE_MTHP_STAT_ATTR(shmem_alloc, MTHP_STAT_SHMEM_ALLOC); >> DEFINE_MTHP_STAT_ATTR(shmem_fallback, MTHP_STAT_SHMEM_FALLBACK); >> DEFINE_MTHP_STAT_ATTR(shmem_fallback_charge, MTHP_STAT_SHMEM_FALLBACK_CHARGE); >> +#endif >> DEFINE_MTHP_STAT_ATTR(split, MTHP_STAT_SPLIT); >> DEFINE_MTHP_STAT_ATTR(split_failed, MTHP_STAT_SPLIT_FAILED); >> DEFINE_MTHP_STAT_ATTR(split_deferred, MTHP_STAT_SPLIT_DEFERRED); >> >> -static struct attribute *stats_attrs[] = { >> +static struct attribute *anon_stats_attrs[] = { >> &anon_fault_alloc_attr.attr, >> &anon_fault_fallback_attr.attr, >> &anon_fault_fallback_charge_attr.attr, >> +#ifndef CONFIG_SHMEM >> &swpout_attr.attr, >> &swpout_fallback_attr.attr, >> - &shmem_alloc_attr.attr, >> - &shmem_fallback_attr.attr, >> - &shmem_fallback_charge_attr.attr, >> +#endif >> &split_attr.attr, >> &split_failed_attr.attr, >> &split_deferred_attr.attr, >> NULL, >> }; >> >> -static struct attribute_group stats_attr_group = { >> +static struct attribute_group anon_stats_attr_grp = { >> + .name = "stats", >> + .attrs = anon_stats_attrs, >> +}; >> + >> +static struct attribute *file_stats_attrs[] = { >> +#ifdef CONFIG_SHMEM >> + &shmem_alloc_attr.attr, >> + &shmem_fallback_attr.attr, >> + &shmem_fallback_charge_attr.attr, >> +#endif >> + NULL, >> +}; >> + >> +static struct attribute_group file_stats_attr_grp = { >> + .name = "stats", >> + .attrs = file_stats_attrs, >> +}; >> + >> +static struct attribute *any_stats_attrs[] = { >> +#ifdef CONFIG_SHMEM >> + &swpout_attr.attr, >> + &swpout_fallback_attr.attr, >> +#endif >> + NULL, >> +}; >> + >> +static struct attribute_group any_stats_attr_grp = { >> .name = "stats", >> - .attrs = stats_attrs, >> + .attrs = any_stats_attrs, >> }; >> >> +static int sysfs_add_group(struct kobject *kobj, >> + const struct attribute_group *grp) >> +{ >> + int ret = -ENOENT; >> + >> + /* >> + * If the group is named, try to merge first, assuming the subdirectory >> + * was already created. This avoids the warning emitted by >> + * sysfs_create_group() if the directory already exists. >> + */ >> + if (grp->name) >> + ret = sysfs_merge_group(kobj, grp); >> + if (ret) >> + ret = sysfs_create_group(kobj, grp); >> + >> + return ret; >> +} >> + >> static struct thpsize *thpsize_create(int order, struct kobject *parent) >> { >> unsigned long size = (PAGE_SIZE << order) / SZ_1K; >> struct thpsize *thpsize; >> - int ret; >> + int ret = -ENOMEM; >> >> thpsize = kzalloc(sizeof(*thpsize), GFP_KERNEL); >> if (!thpsize) >> - return ERR_PTR(-ENOMEM); >> + goto err; >> + >> + thpsize->order = order; >> >> ret = kobject_init_and_add(&thpsize->kobj, &thpsize_ktype, parent, >> "hugepages-%lukB", size); >> if (ret) { >> kfree(thpsize); >> - return ERR_PTR(ret); >> + goto err; >> } >> >> - ret = sysfs_create_group(&thpsize->kobj, &thpsize_attr_group); >> - if (ret) { >> - kobject_put(&thpsize->kobj); >> - return ERR_PTR(ret); >> + >> + ret = sysfs_add_group(&thpsize->kobj, &any_ctrl_attr_grp); >> + if (ret) >> + goto err_put; >> + >> + ret = sysfs_add_group(&thpsize->kobj, &any_stats_attr_grp); >> + if (ret) >> + goto err_put; >> + >> + if (BIT(order) & THP_ORDERS_ALL_ANON) { >> + ret = sysfs_add_group(&thpsize->kobj, &anon_ctrl_attr_grp); >> + if (ret) >> + goto err_put; >> + >> + ret = sysfs_add_group(&thpsize->kobj, &anon_stats_attr_grp); >> + if (ret) >> + goto err_put; >> } >> >> - ret = sysfs_create_group(&thpsize->kobj, &stats_attr_group); >> - if (ret) { >> - kobject_put(&thpsize->kobj); >> - return ERR_PTR(ret); >> + if (BIT(order) & THP_ORDERS_ALL_FILE_DEFAULT) { >> + ret = sysfs_add_group(&thpsize->kobj, &file_ctrl_attr_grp); >> + if (ret) >> + goto err_put; >> + >> + ret = sysfs_add_group(&thpsize->kobj, &file_stats_attr_grp); >> + if (ret) >> + goto err_put; >> } >> >> - thpsize->order = order; >> return thpsize; >> +err_put: >> + kobject_put(&thpsize->kobj); >> +err: >> + return ERR_PTR(ret); >> } >> >> static void thpsize_release(struct kobject *kobj) >> @@ -692,7 +776,7 @@ static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) >> goto remove_hp_group; >> } >> >> - orders = THP_ORDERS_ALL_ANON; >> + orders = THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DEFAULT; >> order = highest_order(orders); >> while (orders) { >> thpsize = thpsize_create(order, *hugepage_kobj); >> -- >> 2.43.0 >> > > Thanks > Barry