Re: [PATCH v3] mm: add per-order mTHP alloc_success and alloc_fail counters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Apr 5, 2024 at 3:57 PM Barry Song <21cnbao@xxxxxxxxx> wrote:
>
> On Fri, Apr 5, 2024 at 4:31 AM David Hildenbrand <david@xxxxxxxxxx> wrote:
> >
> > On 04.04.24 09:21, Ryan Roberts wrote:
> > > On 03/04/2024 22:00, Barry Song wrote:
> > >> On Thu, Apr 4, 2024 at 12:48 AM Ryan Roberts <ryan.roberts@xxxxxxx> wrote:
> > >>>
> > >>> On 03/04/2024 09:22, David Hildenbrand wrote:
> > >>>> On 03.04.24 05:55, Barry Song wrote:
> > >>>>> From: Barry Song <v-songbaohua@xxxxxxxx>
> > >>>>>
> > >>>>> Profiling a system blindly with mTHP has become challenging due
> > >>>>> to the lack of visibility into its operations. Presenting the
> > >>>>> success rate of mTHP allocations appears to be pressing need.
> > >>>>>
> > >>>>> Recently, I've been experiencing significant difficulty debugging
> > >>>>> performance improvements and regressions without these figures.
> > >>>>> It's crucial for us to understand the true effectiveness of
> > >>>>> mTHP in real-world scenarios, especially in systems with
> > >>>>> fragmented memory.
> > >>>>>
> > >>>>> This patch sets up the framework for per-order mTHP counters,
> > >>>>> starting with the introduction of anon_alloc_success and
> > >>>>> anon_alloc_fail counters.  Incorporating additional counters
> > >>>>> should now be straightforward as well.
> > >>>>>
> > >>>>> Signed-off-by: Barry Song <v-songbaohua@xxxxxxxx>
> > >>>>> ---
> > >>>>>    -v3:
> > >>>>>    * save some memory as order-0 and order-1 can't be THP, Ryan;
> > >>>>>    * rename to anon_alloc as right now we only support anon to address
> > >>>>>      David's comment;
> > >>>>>    * drop a redundant "else", Ryan
> > >>>>>
> > >>>>>    include/linux/huge_mm.h | 18 ++++++++++++++
> > >>>>>    mm/huge_memory.c        | 54 +++++++++++++++++++++++++++++++++++++++++
> > >>>>>    mm/memory.c             |  2 ++
> > >>>>>    3 files changed, 74 insertions(+)
> > >>>>>
> > >>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > >>>>> index e896ca4760f6..5e9af6be9537 100644
> > >>>>> --- a/include/linux/huge_mm.h
> > >>>>> +++ b/include/linux/huge_mm.h
> > >>>>> @@ -70,6 +70,7 @@ extern struct kobj_attribute shmem_enabled_attr;
> > >>>>>     * (which is a limitation of the THP implementation).
> > >>>>>     */
> > >>>>>    #define THP_ORDERS_ALL_ANON    ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1)))
> > >>>>> +#define THP_MIN_ORDER        2
> > >>>>>      /*
> > >>>>>     * Mask of all large folio orders supported for file THP.
> > >>>>> @@ -264,6 +265,23 @@ unsigned long thp_vma_allowable_orders(struct
> > >>>>> vm_area_struct *vma,
> > >>>>>                          enforce_sysfs, orders);
> > >>>>>    }
> > >>>>>    +enum thp_event_item {
> > >>>>> +    THP_ANON_ALLOC_SUCCESS,
> > >>>>> +    THP_ANON_ALLOC_FAIL,
> > >>>>> +    NR_THP_EVENT_ITEMS
> > >>>>> +};
> > >>>>
> > >>>> Maybe use a prefix that resembles matches the enum name and is "obviously"
> > >>>> different to the ones in vm_event_item.h, like
> > >>>>
> > >>>> enum thp_event {
> > >>>>      THP_EVENT_ANON_ALLOC_SUCCESS,
> > >>>>      THP_EVENT_ANON_ALLOC_FAIL,
> > >>>>      __THP_EVENT_COUNT,
> > >>>> };
> > >>>
> > >>> FWIW, I'd personally replace "event" with "stat". For me "event" only ever
> > >>> increments, but "stat" can increment and decrement. An event is a type of stat.
> > >>>
> > >>> You are only adding events for now, but we have identified a need for inc/dec
> > >>> stats that will be added in future.
> > >>
> > >> What about the below?
> > >>
> > >> enum thp_stat {
>
> It seems we still need to use enum thp_stat_item rather than thp_stat.
> This follows
> enum zone_stat_item
> enum numa_stat_item
> enum node_stat_item
>
> And most importantly, the below looks much better
>
>        enum thp_stat_item {
>               THP_STAT_ANON_ALLOC,
>               THP_STAT_ANON_ALLOC_FALLBACK,
>               __THP_STAT_COUNT
>        };
>
>        struct thp_state {
>               unsigned long state[PMD_ORDER + 1][__THP_STAT_COUNT];
>        };
>
>        DECLARE_PER_CPU(struct thp_state, thp_states);
>
> than
>
>        enum thp_stat {
>               THP_STAT_ANON_ALLOC,
>               THP_STAT_ANON_ALLOC_FALLBACK,
>               __THP_STAT_COUNT
>        };
>
>        struct thp_state {
>               unsigned long state[PMD_ORDER + 1][__THP_STAT_COUNT];
>        };
>
> > >>     THP_EVENT_ANON_ALLOC,
> > >>     THP_EVENT_ANON_ALLOC_FALLBACK,
> > >>     THP_EVENT_SWPOUT,
> > >>     THP_EVENT_SWPOUT_FALLBACK,
> > >>     ...
> > >>     THP_NR_ANON_PAGES,
> > >>     THP_NR_FILE_PAGES,
> > >
> > > I find this ambiguous; Is it the number of THPs or the number of base pages?
> > >
> > > I think David made the point about incorporating the enum name into the labels
> > > too, so that there can be no namespace confusion. How about:
> > >
> > > <enum>_<type>_<name>
> > >
> > > So:
> > >
> > > THP_STAT_EV_ANON_ALLOC
> > > THP_STAT_EV_ANON_ALLOC_FALLBACK
> > > THP_STAT_EV_ANON_PARTIAL
> > > THP_STAT_EV_SWPOUT
> > > THP_STAT_EV_SWPOUT_FALLBACK
> > > ...
> > > THP_STAT_NR_ANON
> > > THP_STAT_NR_FILE
> > > ...
> > > __THP_STAT_COUNT
> >
> > I'd even drop the "EV". "NR_ANON" vs "ANON_ALLOC" etc. is expressive enough.
>
> ok.

Hi David, Ryan,

I've named everything as follows. Please let me know if you have any further
suggestions before I send the updated version :-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e896ca4760f6..cc13fa14aa32 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -264,6 +264,23 @@ unsigned long thp_vma_allowable_orders(struct
vm_area_struct *vma,
                                          enforce_sysfs, orders);
 }

+enum thp_stat_item {
+       THP_STAT_ANON_ALLOC,
+       THP_STAT_ANON_ALLOC_FALLBACK,
+       __THP_STAT_COUNT
+};
+
+struct thp_state {
+       unsigned long state[PMD_ORDER + 1][__THP_STAT_COUNT];
+};
+
+DECLARE_PER_CPU(struct thp_state, thp_states);
+
+static inline void count_thp_state(int order, enum thp_stat_item item)
+{
+       this_cpu_inc(thp_states.state[order][item]);
+}
+
 #define transparent_hugepage_use_zero_page()                           \
        (transparent_hugepage_flags &                                   \
         (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9d4b2fbf6872..e704b4408181 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -526,6 +526,46 @@ static const struct kobj_type thpsize_ktype = {
        .sysfs_ops = &kobj_sysfs_ops,
 };

+DEFINE_PER_CPU(struct thp_state, thp_states) = {{{0}}};
+
+static unsigned long sum_thp_states(int order, enum thp_stat_item item)
+{
+       unsigned long sum = 0;
+       int cpu;
+
+       for_each_online_cpu(cpu) {
+               struct thp_state *this = &per_cpu(thp_states, cpu);
+
+               sum += this->state[order][item];
+       }
+
+       return sum;
+}
+
+#define THP_STATE_ATTR(_name, _index)                                  \
+static ssize_t _name##_show(struct kobject *kobj,                      \
+                       struct kobj_attribute *attr, char *buf)         \
+{                                                                      \
+       int order = to_thpsize(kobj)->order;                            \
+                                                                       \
+       return sysfs_emit(buf, "%lu\n", sum_thp_states(order, _index)); \
+}                                                                      \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+THP_STATE_ATTR(anon_alloc, THP_STAT_ANON_ALLOC);
+THP_STATE_ATTR(anon_alloc_fallback, THP_STAT_ANON_ALLOC_FALLBACK);

>
> >
> > --
> > Cheers,
> >
> > David / dhildenb

Thanks
Barry





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux