Monitoring both enter/exit for all allocations and track down the one that are slow will be a very big load on mobile devices or embedded device consuming a lot of battery and cpu. With this we can do useful monitoring on devices on our field tests with real usage. On 11/23/2017 01:25 PM, Michal Hocko wrote: > On Thu 23-11-17 11:43:36, peter.enderborg@xxxxxxxx wrote: >> From: Peter Enderborg <peter.enderborg@xxxxxxxx> >> >> The warning of slow allocation has been removed, this is >> a other way to fetch that information. But you need >> to enable the trace. The exit function also returns >> information about the number of retries, how long >> it was stalled and failure reason if that happened. > I think this is just too excessive. We already have a tracepoint for the > allocation exit. All we need is an entry to have a base to compare with. > Another usecase would be to measure allocation latency. Information you > are adding can be (partially) covered by existing tracepoints. > >> Signed-off-by: Peter Enderborg <peter.enderborg@xxxxxxxx> >> --- >> include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++ >> mm/page_alloc.c | 62 +++++++++++++++++++++++++++++++---------- >> 2 files changed, 116 insertions(+), 14 deletions(-) >> >> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h >> index eb57e30..bb882ca 100644 >> --- a/include/trace/events/kmem.h >> +++ b/include/trace/events/kmem.h >> @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag, >> __entry->change_ownership) >> ); >> >> +TRACE_EVENT(mm_page_alloc_slowpath_enter, >> + >> + TP_PROTO(int alloc_order, >> + nodemask_t *nodemask, >> + gfp_t gfp_flags), >> + >> + TP_ARGS(alloc_order, nodemask, gfp_flags), >> + >> + TP_STRUCT__entry( >> + __field(int, alloc_order) >> + __field(nodemask_t *, nodemask) >> + __field(gfp_t, gfp_flags) >> + ), >> + >> + TP_fast_assign( >> + __entry->alloc_order = alloc_order; >> + __entry->nodemask = nodemask; >> + __entry->gfp_flags = gfp_flags; >> + ), >> + >> + TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s", >> + __entry->alloc_order, >> + nodemask_pr_args(__entry->nodemask), >> + show_gfp_flags(__entry->gfp_flags)) >> +); >> + >> +TRACE_EVENT(mm_page_alloc_slowpath_exit, >> + >> + TP_PROTO(struct page *page, >> + int alloc_order, >> + nodemask_t *nodemask, >> + u64 alloc_start, >> + gfp_t gfp_flags, >> + int retrys, >> + int exit), >> + >> + TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags, >> + retrys, exit), >> + >> + TP_STRUCT__entry(__field(struct page *, page) >> + __field(int, alloc_order) >> + __field(nodemask_t *, nodemask) >> + __field(u64, msdelay) >> + __field(gfp_t, gfp_flags) >> + __field(int, retrys) >> + __field(int, exit) >> + ), >> + >> + TP_fast_assign( >> + __entry->page = page; >> + __entry->alloc_order = alloc_order; >> + __entry->nodemask = nodemask; >> + __entry->msdelay = jiffies_to_msecs(jiffies-alloc_start); >> + __entry->gfp_flags = gfp_flags; >> + __entry->retrys = retrys; >> + __entry->exit = exit; >> + ), >> + >> + TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d", >> + __entry->page, >> + __entry->alloc_order, >> + nodemask_pr_args(__entry->nodemask), >> + __entry->msdelay, >> + show_gfp_flags(__entry->gfp_flags), >> + __entry->retrys, >> + __entry->exit) >> +); >> + >> #endif /* _TRACE_KMEM_H */ >> >> /* This part must be outside protection */ >> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >> index 48b5b01..bae9cb9 100644 >> --- a/mm/page_alloc.c >> +++ b/mm/page_alloc.c >> @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain); >> volatile unsigned long latent_entropy __latent_entropy; >> EXPORT_SYMBOL(latent_entropy); >> #endif >> +enum slowpath_exit { >> + SLOWPATH_NOZONE = -16, >> + SLOWPATH_COMPACT_DEFERRED, >> + SLOWPATH_CAN_NOT_DIRECT_RECLAIM, >> + SLOWPATH_RECURSION, >> + SLOWPATH_NO_RETRY, >> + SLOWPATH_COSTLY_ORDER, >> + SLOWPATH_OOM_VICTIM, >> + SLOWPATH_NO_DIRECT_RECLAIM, >> + SLOWPATH_ORDER >> +}; >> >> /* >> * Array of node states. >> @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> enum compact_result compact_result; >> int compaction_retries; >> int no_progress_loops; >> + unsigned long alloc_start = jiffies; >> unsigned int cpuset_mems_cookie; >> int reserve_flags; >> + enum slowpath_exit slowpath_exit; >> + int retry_count = 0; >> + >> + trace_mm_page_alloc_slowpath_enter(order, >> + ac->nodemask, >> + gfp_mask); >> >> /* >> * In the slowpath, we sanity check order to avoid ever trying to >> @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> */ >> if (order >= MAX_ORDER) { >> WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); >> - return NULL; >> + slowpath_exit = SLOWPATH_ORDER; >> + goto fail; >> } >> >> /* >> @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> */ >> ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, >> ac->high_zoneidx, ac->nodemask); >> - if (!ac->preferred_zoneref->zone) >> + if (!ac->preferred_zoneref->zone) { >> + slowpath_exit = SLOWPATH_NOZONE; >> goto nopage; >> + } >> >> if (gfp_mask & __GFP_KSWAPD_RECLAIM) >> wake_all_kswapds(order, ac); >> @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> * system, so we fail the allocation instead of entering >> * direct reclaim. >> */ >> - if (compact_result == COMPACT_DEFERRED) >> + if (compact_result == COMPACT_DEFERRED) { >> + slowpath_exit = SLOWPATH_COMPACT_DEFERRED; >> goto nopage; >> + } >> >> /* >> * Looks like reclaim/compaction is worth trying, but >> @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> } >> >> retry: >> + retry_count++; >> /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ >> if (gfp_mask & __GFP_KSWAPD_RECLAIM) >> wake_all_kswapds(order, ac); >> @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> goto got_pg; >> >> /* Caller is not willing to reclaim, we can't balance anything */ >> - if (!can_direct_reclaim) >> + if (!can_direct_reclaim) { >> + slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM; >> goto nopage; >> + } >> >> /* Avoid recursion of direct reclaim */ >> - if (current->flags & PF_MEMALLOC) >> + if (current->flags & PF_MEMALLOC) { >> + slowpath_exit = SLOWPATH_RECURSION; >> goto nopage; >> - >> + } >> /* Try direct reclaim and then allocating */ >> page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, >> &did_some_progress); >> @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> goto got_pg; >> >> /* Do not loop if specifically requested */ >> - if (gfp_mask & __GFP_NORETRY) >> + if (gfp_mask & __GFP_NORETRY) { >> + slowpath_exit = SLOWPATH_NO_RETRY; >> goto nopage; >> - >> + } >> /* >> * Do not retry costly high order allocations unless they are >> * __GFP_RETRY_MAYFAIL >> */ >> - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) >> + if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) { >> + slowpath_exit = SLOWPATH_COSTLY_ORDER; >> goto nopage; >> - >> + } >> if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, >> did_some_progress > 0, &no_progress_loops)) >> goto retry; >> @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> /* Avoid allocations with no watermarks from looping endlessly */ >> if (tsk_is_oom_victim(current) && >> (alloc_flags == ALLOC_OOM || >> - (gfp_mask & __GFP_NOMEMALLOC))) >> + (gfp_mask & __GFP_NOMEMALLOC))) { >> + slowpath_exit = SLOWPATH_OOM_VICTIM; >> goto nopage; >> - >> + } >> /* Retry as long as the OOM killer is making progress */ >> if (did_some_progress) { >> no_progress_loops = 0; >> @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> * All existing users of the __GFP_NOFAIL are blockable, so warn >> * of any new users that actually require GFP_NOWAIT >> */ >> - if (WARN_ON_ONCE(!can_direct_reclaim)) >> + if (WARN_ON_ONCE(!can_direct_reclaim)) { >> + slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM; >> goto fail; >> - >> + } >> /* >> * PF_MEMALLOC request from this context is rather bizarre >> * because we cannot reclaim anything and only can loop waiting >> @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, >> warn_alloc(gfp_mask, ac->nodemask, >> "page allocation failure: order:%u", order); >> got_pg: >> + trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask, >> + alloc_start, gfp_mask, retry_count, slowpath_exit); >> + >> return page; >> } >> >> -- >> 2.7.4 >> >> -- >> To unsubscribe, send a message with 'unsubscribe linux-mm' in >> the body to majordomo@xxxxxxxxx. For more info on Linux MM, >> see: http://www.linux-mm.org/ . >> Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>