On Thu 23-11-17 11:43:36, peter.enderborg@xxxxxxxx wrote: > From: Peter Enderborg <peter.enderborg@xxxxxxxx> > > The warning of slow allocation has been removed, this is > a other way to fetch that information. But you need > to enable the trace. The exit function also returns > information about the number of retries, how long > it was stalled and failure reason if that happened. I think this is just too excessive. We already have a tracepoint for the allocation exit. All we need is an entry to have a base to compare with. Another usecase would be to measure allocation latency. Information you are adding can be (partially) covered by existing tracepoints. > Signed-off-by: Peter Enderborg <peter.enderborg@xxxxxxxx> > --- > include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++ > mm/page_alloc.c | 62 +++++++++++++++++++++++++++++++---------- > 2 files changed, 116 insertions(+), 14 deletions(-) > > diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h > index eb57e30..bb882ca 100644 > --- a/include/trace/events/kmem.h > +++ b/include/trace/events/kmem.h > @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag, > __entry->change_ownership) > ); > > +TRACE_EVENT(mm_page_alloc_slowpath_enter, > + > + TP_PROTO(int alloc_order, > + nodemask_t *nodemask, > + gfp_t gfp_flags), > + > + TP_ARGS(alloc_order, nodemask, gfp_flags), > + > + TP_STRUCT__entry( > + __field(int, alloc_order) > + __field(nodemask_t *, nodemask) > + __field(gfp_t, gfp_flags) > + ), > + > + TP_fast_assign( > + __entry->alloc_order = alloc_order; > + __entry->nodemask = nodemask; > + __entry->gfp_flags = gfp_flags; > + ), > + > + TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s", > + __entry->alloc_order, > + nodemask_pr_args(__entry->nodemask), > + show_gfp_flags(__entry->gfp_flags)) > +); > + > +TRACE_EVENT(mm_page_alloc_slowpath_exit, > + > + TP_PROTO(struct page *page, > + int alloc_order, > + nodemask_t *nodemask, > + u64 alloc_start, > + gfp_t gfp_flags, > + int retrys, > + int exit), > + > + TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags, > + retrys, exit), > + > + TP_STRUCT__entry(__field(struct page *, page) > + __field(int, alloc_order) > + __field(nodemask_t *, nodemask) > + __field(u64, msdelay) > + __field(gfp_t, gfp_flags) > + __field(int, retrys) > + __field(int, exit) > + ), > + > + TP_fast_assign( > + __entry->page = page; > + __entry->alloc_order = alloc_order; > + __entry->nodemask = nodemask; > + __entry->msdelay = jiffies_to_msecs(jiffies-alloc_start); > + __entry->gfp_flags = gfp_flags; > + __entry->retrys = retrys; > + __entry->exit = exit; > + ), > + > + TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d", > + __entry->page, > + __entry->alloc_order, > + nodemask_pr_args(__entry->nodemask), > + __entry->msdelay, > + show_gfp_flags(__entry->gfp_flags), > + __entry->retrys, > + __entry->exit) > +); > + > #endif /* _TRACE_KMEM_H */ > > /* This part must be outside protection */ > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 48b5b01..bae9cb9 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain); > volatile unsigned long latent_entropy __latent_entropy; > EXPORT_SYMBOL(latent_entropy); > #endif > +enum slowpath_exit { > + SLOWPATH_NOZONE = -16, > + SLOWPATH_COMPACT_DEFERRED, > + SLOWPATH_CAN_NOT_DIRECT_RECLAIM, > + SLOWPATH_RECURSION, > + SLOWPATH_NO_RETRY, > + SLOWPATH_COSTLY_ORDER, > + SLOWPATH_OOM_VICTIM, > + SLOWPATH_NO_DIRECT_RECLAIM, > + SLOWPATH_ORDER > +}; > > /* > * Array of node states. > @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > enum compact_result compact_result; > int compaction_retries; > int no_progress_loops; > + unsigned long alloc_start = jiffies; > unsigned int cpuset_mems_cookie; > int reserve_flags; > + enum slowpath_exit slowpath_exit; > + int retry_count = 0; > + > + trace_mm_page_alloc_slowpath_enter(order, > + ac->nodemask, > + gfp_mask); > > /* > * In the slowpath, we sanity check order to avoid ever trying to > @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > */ > if (order >= MAX_ORDER) { > WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); > - return NULL; > + slowpath_exit = SLOWPATH_ORDER; > + goto fail; > } > > /* > @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > */ > ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, > ac->high_zoneidx, ac->nodemask); > - if (!ac->preferred_zoneref->zone) > + if (!ac->preferred_zoneref->zone) { > + slowpath_exit = SLOWPATH_NOZONE; > goto nopage; > + } > > if (gfp_mask & __GFP_KSWAPD_RECLAIM) > wake_all_kswapds(order, ac); > @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > * system, so we fail the allocation instead of entering > * direct reclaim. > */ > - if (compact_result == COMPACT_DEFERRED) > + if (compact_result == COMPACT_DEFERRED) { > + slowpath_exit = SLOWPATH_COMPACT_DEFERRED; > goto nopage; > + } > > /* > * Looks like reclaim/compaction is worth trying, but > @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > } > > retry: > + retry_count++; > /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ > if (gfp_mask & __GFP_KSWAPD_RECLAIM) > wake_all_kswapds(order, ac); > @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > goto got_pg; > > /* Caller is not willing to reclaim, we can't balance anything */ > - if (!can_direct_reclaim) > + if (!can_direct_reclaim) { > + slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM; > goto nopage; > + } > > /* Avoid recursion of direct reclaim */ > - if (current->flags & PF_MEMALLOC) > + if (current->flags & PF_MEMALLOC) { > + slowpath_exit = SLOWPATH_RECURSION; > goto nopage; > - > + } > /* Try direct reclaim and then allocating */ > page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, > &did_some_progress); > @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > goto got_pg; > > /* Do not loop if specifically requested */ > - if (gfp_mask & __GFP_NORETRY) > + if (gfp_mask & __GFP_NORETRY) { > + slowpath_exit = SLOWPATH_NO_RETRY; > goto nopage; > - > + } > /* > * Do not retry costly high order allocations unless they are > * __GFP_RETRY_MAYFAIL > */ > - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) > + if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) { > + slowpath_exit = SLOWPATH_COSTLY_ORDER; > goto nopage; > - > + } > if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, > did_some_progress > 0, &no_progress_loops)) > goto retry; > @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > /* Avoid allocations with no watermarks from looping endlessly */ > if (tsk_is_oom_victim(current) && > (alloc_flags == ALLOC_OOM || > - (gfp_mask & __GFP_NOMEMALLOC))) > + (gfp_mask & __GFP_NOMEMALLOC))) { > + slowpath_exit = SLOWPATH_OOM_VICTIM; > goto nopage; > - > + } > /* Retry as long as the OOM killer is making progress */ > if (did_some_progress) { > no_progress_loops = 0; > @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > * All existing users of the __GFP_NOFAIL are blockable, so warn > * of any new users that actually require GFP_NOWAIT > */ > - if (WARN_ON_ONCE(!can_direct_reclaim)) > + if (WARN_ON_ONCE(!can_direct_reclaim)) { > + slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM; > goto fail; > - > + } > /* > * PF_MEMALLOC request from this context is rather bizarre > * because we cannot reclaim anything and only can loop waiting > @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, > warn_alloc(gfp_mask, ac->nodemask, > "page allocation failure: order:%u", order); > got_pg: > + trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask, > + alloc_start, gfp_mask, retry_count, slowpath_exit); > + > return page; > } > > -- > 2.7.4 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>