On Mon, 12 Feb 2024 at 23:29, Oscar Salvador <osalvador@xxxxxxx> wrote: > > page_owner needs to increment a stack_record refcount when a new allocation > occurs, and decrement it on a free operation. > In order to do that, we need to have a way to get a stack_record from a > handle. > Implement __stack_depot_get_stack_record() which just does that, and make > it public so page_owner can use it. > > Also implement {inc,dec}_stack_record_count() which increments > or decrements on respective allocation and free operations, via > __reset_page_owner() (free operation) and __set_page_owner() (alloc > operation). > > Traversing all stackdepot buckets comes with its own complexity, > plus we would have to implement a way to mark only those stack_records > that were originated from page_owner, as those are the ones we are > interested in. > For that reason, page_owner maintains its own list of stack_records, > because traversing that list is faster than traversing all buckets > while keeping at the same time a low complexity. > inc_stack_record_count() is responsible of adding new stack_records > into the list stack_list. > > Modifications on the list are protected via a spinlock with irqs > disabled, since this code can also be reached from IRQ context. > > Signed-off-by: Oscar Salvador <osalvador@xxxxxxx> For the code: Reviewed-by: Marco Elver <elver@xxxxxxxxxx> But see minor comments below. > --- > include/linux/stackdepot.h | 9 +++++ > lib/stackdepot.c | 8 +++++ > mm/page_owner.c | 73 ++++++++++++++++++++++++++++++++++++++ > 3 files changed, 90 insertions(+) > > diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h > index 90274860fd8e..f3c2162bf615 100644 > --- a/include/linux/stackdepot.h > +++ b/include/linux/stackdepot.h > @@ -175,6 +175,15 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, > depot_stack_handle_t stack_depot_save(unsigned long *entries, > unsigned int nr_entries, gfp_t gfp_flags); > > +/** > + * __stack_depot_get_stack_record - Get a pointer to a stack_record struct > + * This function is only for internal purposes. I think the body of the kernel doc needs to go after argument declarations. > + * @handle: Stack depot handle > + * > + * Return: Returns a pointer to a stack_record struct > + */ > +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle); > + > /** > * stack_depot_fetch - Fetch a stack trace from stack depot > * > diff --git a/lib/stackdepot.c b/lib/stackdepot.c > index 6f9095374847..fdb09450a538 100644 > --- a/lib/stackdepot.c > +++ b/lib/stackdepot.c > @@ -685,6 +685,14 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, > } > EXPORT_SYMBOL_GPL(stack_depot_save); > > +struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle) > +{ > + if (!handle) > + return NULL; > + > + return depot_fetch_stack(handle); > +} > + > unsigned int stack_depot_fetch(depot_stack_handle_t handle, > unsigned long **entries) > { > diff --git a/mm/page_owner.c b/mm/page_owner.c > index 5634e5d890f8..7d1b3f75cef3 100644 > --- a/mm/page_owner.c > +++ b/mm/page_owner.c > @@ -36,6 +36,14 @@ struct page_owner { > pid_t free_tgid; > }; > > +struct stack { > + struct stack_record *stack_record; > + struct stack *next; > +}; > + > +static struct stack *stack_list; > +static DEFINE_SPINLOCK(stack_list_lock); > + > static bool page_owner_enabled __initdata; > DEFINE_STATIC_KEY_FALSE(page_owner_inited); > > @@ -61,6 +69,57 @@ static __init bool need_page_owner(void) > return page_owner_enabled; > } > > +static void add_stack_record_to_list(struct stack_record *stack_record) > +{ > + unsigned long flags; > + struct stack *stack; > + > + stack = kmalloc(sizeof(*stack), GFP_KERNEL); > + if (stack) { It's usually more elegant to write if (!stack) return; If the rest of the function is conditional. > + stack->stack_record = stack_record; > + stack->next = NULL; > + > + spin_lock_irqsave(&stack_list_lock, flags); > + if (!stack_list) { > + stack_list = stack; > + } else { > + stack->next = stack_list; > + stack_list = stack; > + } > + spin_unlock_irqrestore(&stack_list_lock, flags); > + } > +} > + > +static void inc_stack_record_count(depot_stack_handle_t handle) > +{ > + struct stack_record *stack_record = __stack_depot_get_stack_record(handle); > + > + if (stack_record) { > + /* > + * New stack_record's that do not use STACK_DEPOT_FLAG_GET start > + * with REFCOUNT_SATURATED to catch spurious increments of their > + * refcount. > + * Since we do not use STACK_DEPOT_FLAG_{GET,PUT} API, let us I think I mentioned this in the other email, there is no STACK_DEPOT_FLAG_PUT, only stack_depot_put(). > + * set a refcount of 1 ourselves. > + */ > + if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) { > + refcount_set(&stack_record->count, 1); > + > + /* Add the new stack_record to our list */ > + add_stack_record_to_list(stack_record); > + } > + refcount_inc(&stack_record->count); > + } > +} > + > +static void dec_stack_record_count(depot_stack_handle_t handle) > +{ > + struct stack_record *stack_record = __stack_depot_get_stack_record(handle); > + > + if (stack_record) > + refcount_dec(&stack_record->count); > +} > + > static __always_inline depot_stack_handle_t create_dummy_stack(void) > { > unsigned long entries[4]; > @@ -140,6 +199,7 @@ void __reset_page_owner(struct page *page, unsigned short order) > int i; > struct page_ext *page_ext; > depot_stack_handle_t handle; > + depot_stack_handle_t alloc_handle; > struct page_owner *page_owner; > u64 free_ts_nsec = local_clock(); > > @@ -147,6 +207,9 @@ void __reset_page_owner(struct page *page, unsigned short order) > if (unlikely(!page_ext)) > return; > > + page_owner = get_page_owner(page_ext); > + alloc_handle = page_owner->handle; > + > handle = save_stack(GFP_NOWAIT | __GFP_NOWARN); > for (i = 0; i < (1 << order); i++) { > __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags); > @@ -158,6 +221,15 @@ void __reset_page_owner(struct page *page, unsigned short order) > page_ext = page_ext_next(page_ext); > } > page_ext_put(page_ext); > + if (alloc_handle != early_handle) > + /* > + * early_handle is being set as a handle for all those > + * early allocated pages. See init_pages_in_zone(). > + * Since their refcount is not being incremented because > + * the machinery is not ready yet, we cannot decrement > + * their refcount either. > + */ > + dec_stack_record_count(alloc_handle); > } > > static inline void __set_page_owner_handle(struct page_ext *page_ext, > @@ -199,6 +271,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order, > return; > __set_page_owner_handle(page_ext, handle, order, gfp_mask); > page_ext_put(page_ext); > + inc_stack_record_count(handle); > } > > void __set_page_owner_migrate_reason(struct page *page, int reason) > -- > 2.43.0 >