On 2024-12-09 18:39:32 [-0800], Alexei Starovoitov wrote: > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index d511e68903c6..a969a62ec0c3 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1251,9 +1254,33 @@ static void free_one_page(struct zone *zone, struct page *page, > unsigned long pfn, unsigned int order, > fpi_t fpi_flags) > { > + struct llist_head *llhead; > unsigned long flags; > > - spin_lock_irqsave(&zone->lock, flags); > + if (!spin_trylock_irqsave(&zone->lock, flags)) { > + if (unlikely(fpi_flags & FPI_TRYLOCK)) { > + /* Remember the order */ > + page->order = order; > + /* Add the page to the free list */ > + llist_add(&page->pcp_llist, &zone->trylock_free_pages); > + return; > + } > + spin_lock_irqsave(&zone->lock, flags); > + } > + > + /* The lock succeeded. Process deferred pages. */ > + llhead = &zone->trylock_free_pages; > + if (unlikely(!llist_empty(llhead))) { > + struct llist_node *llnode; > + struct page *p, *tmp; > + > + llnode = llist_del_all(llhead); Do you really need to turn the list around? > + llist_for_each_entry_safe(p, tmp, llnode, pcp_llist) { > + unsigned int p_order = p->order; > + split_large_buddy(zone, p, page_to_pfn(p), p_order, fpi_flags); > + __count_vm_events(PGFREE, 1 << p_order); > + } We had something like that (returning memory in IRQ/ irq-off) in RT tree and we got rid of it before posting the needed bits to mm. If we really intend to do something like this, could we please process this list in an explicitly locked section? I mean not in a try-lock fashion which might have originated in an IRQ-off region on PREEMPT_RT but in an explicit locked section which would remain preemptible. This would also avoid the locking problem down the road when shuffle_pick_tail() invokes get_random_u64() which in turn acquires a spinlock_t. > + } > split_large_buddy(zone, page, pfn, order, fpi_flags); > spin_unlock_irqrestore(&zone->lock, flags); > Sebastian