On 5/19/22 17:03, Steven Rostedt wrote: > On Thu, 19 May 2022 14:35:46 +0300 > Vasily Averin <vvs@xxxxxxxxxx> wrote: > >>>> @@ -33,42 +35,46 @@ DECLARE_EVENT_CLASS(kmem_alloc, >>>> __entry->bytes_req = bytes_req; >>>> __entry->bytes_alloc = bytes_alloc; >>>> __entry->gfp_flags = (__force unsigned long)gfp_flags; >>>> + __entry->accounted = (gfp_flags & __GFP_ACCOUNT) || >>>> + (s && s->flags & SLAB_ACCOUNT); >>> >>> Now you could make this even faster in the fast path and save just the >>> s->flags. >>> >>> __entry->sflags = s ? s->flags : 0; >>> >>>> ), >>>> >>>> - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", >>>> + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s", >>>> (void *)__entry->call_site, >>>> __entry->ptr, >>>> __entry->bytes_req, >>>> __entry->bytes_alloc, >>>> - show_gfp_flags(__entry->gfp_flags)) >>>> + show_gfp_flags(__entry->gfp_flags), >>>> + __entry->accounted ? "true" : "false") >>> >>> And then have: "accounted=%s": >>> >>> (__entry->gfp_flags & __GFP_ACCOUNT) || >>> (__entry->sflags & SLAB_ACCOUNT) ? "true" : "false" >> >> Unfortunately this returns back sparse warnings about bitwise gfp_t and slab_flags_t casts. >> Could you please explain why your variant is faster? > > Micro-optimization, grant you, but it is faster because it moves some of > the logic into the slow path (the read side), and takes it out of the fast > path (the write side). > > The idea of tracing is to squeeze out every cycle we can to keep the > tracing overhead down. > > But it's really up to you if you need that. I'm not going to let this be a > blocker. This is more of an FYI than anything else. Frankly speaking I vote for performance with both hands. However I'm still would like to avoid new sparse warnings. Christoph Hellwig just recently taught me, "never add '__force' before thinking hard about them", but in this case I would need to use it three times. I found that bitwise typecasts can be avoided by using translation unions. What do you think about following trick? diff --git a/mm/slab.h b/mm/slab.h index 95eb34174c1b..f676612ca40f 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -882,4 +882,14 @@ void __check_heap_object(const void *ptr, unsigned long n, } #endif +union gfp_flags_u { + unsigned long ulong; + gfp_t flags; +}; + +union slab_flags_u { + unsigned int uint; + slab_flags_t sflags; +}; + #endif /* MM_SLAB_H */ diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 71c141804222..91632a61e16d 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -13,18 +13,20 @@ DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, + struct kmem_cache *s, size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, s, bytes_req, bytes_alloc, gfp_flags), TP_STRUCT__entry( __field( unsigned long, call_site ) __field( const void *, ptr ) __field( size_t, bytes_req ) __field( size_t, bytes_alloc ) - __field( unsigned long, gfp_flags ) + __field_struct( union gfp_flags_u, gfp ) + __field_struct( union slab_flags_u, s ) ), TP_fast_assign( @@ -32,51 +34,57 @@ DECLARE_EVENT_CLASS(kmem_alloc, __entry->ptr = ptr; __entry->bytes_req = bytes_req; __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = (__force unsigned long)gfp_flags; + __entry->gfp.flags = gfp_flags; + __entry->s.sflags = s ? s->flags : 0; ), - TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s accounted=%s", (void *)__entry->call_site, __entry->ptr, __entry->bytes_req, __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + show_gfp_flags(__entry->gfp.ulong), + ((__entry->gfp.flags & __GFP_ACCOUNT) || + (__entry->s.sflags & SLAB_ACCOUNT)) ? "true" : "false") ); Thank you, Vasily Averin