On 3/20/24 7:02 PM, Johannes Weiner wrote: > Currently, page block type conversion during fallbacks, atomic > reservations and isolation can strand various amounts of free pages on > incorrect freelists. > > For example, fallback stealing moves free pages in the block to the > new type's freelists, but then may not actually claim the block for > that type if there aren't enough compatible pages already allocated. > > In all cases, free page moving might fail if the block straddles more > than one zone, in which case no free pages are moved at all, but the > block type is changed anyway. > > This is detrimental to type hygiene on the freelists. It encourages > incompatible page mixing down the line (ask for one type, get another) > and thus contributes to long-term fragmentation. > > Split the process into a proper transaction: check first if conversion > will happen, then try to move the free pages, and only if that was > successful convert the block to the new type. > > Tested-by: "Huang, Ying" <ying.huang@xxxxxxxxx> > Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> Reviewed-by: Vlastimil Babka <vbabka@xxxxxxx> Nit below: > @@ -1743,33 +1770,37 @@ static inline bool boost_watermark(struct zone *zone) > } > > /* > - * This function implements actual steal behaviour. If order is large enough, > - * we can steal whole pageblock. If not, we first move freepages in this > - * pageblock to our migratetype and determine how many already-allocated pages > - * are there in the pageblock with a compatible migratetype. If at least half > - * of pages are free or compatible, we can change migratetype of the pageblock > - * itself, so pages freed in the future will be put on the correct free list. > + * This function implements actual steal behaviour. If order is large enough, we > + * can claim the whole pageblock for the requested migratetype. If not, we check > + * the pageblock for constituent pages; if at least half of the pages are free > + * or compatible, we can still claim the whole block, so pages freed in the > + * future will be put on the correct free list. Otherwise, we isolate exactly > + * the order we need from the fallback block and leave its migratetype alone. > */ > -static void steal_suitable_fallback(struct zone *zone, struct page *page, > - unsigned int alloc_flags, int start_type, bool whole_block) > +static struct page * > +steal_suitable_fallback(struct zone *zone, struct page *page, > + int current_order, int order, int start_type, > + unsigned int alloc_flags, bool whole_block) > { > - unsigned int current_order = buddy_order(page); > int free_pages, movable_pages, alike_pages; > - int old_block_type; > + unsigned long start_pfn, end_pfn; > + int block_type; > > - old_block_type = get_pageblock_migratetype(page); > + block_type = get_pageblock_migratetype(page); > > /* > * This can happen due to races and we want to prevent broken > * highatomic accounting. > */ > - if (is_migrate_highatomic(old_block_type)) > + if (is_migrate_highatomic(block_type)) > goto single_page; > > /* Take ownership for orders >= pageblock_order */ > if (current_order >= pageblock_order) { > + del_page_from_free_list(page, zone, current_order); > change_pageblock_range(page, current_order, start_type); > - goto single_page; > + expand(zone, page, order, current_order, start_type); > + return page; Is the exact order here important (AFAIK shouldn't be?) or we could just change_pageblock_range(); block_type = start_type; goto single_page? > } > > /* > @@ -1784,10 +1815,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, > if (!whole_block) > goto single_page; > > - free_pages = move_freepages_block(zone, page, start_type, > - &movable_pages); > /* moving whole block can fail due to zone boundary conditions */ > - if (!free_pages) > + if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, > + &free_pages, &movable_pages)) > goto single_page; > > /* > @@ -1805,7 +1835,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, > * vice versa, be conservative since we can't distinguish the > * exact migratetype of non-movable pages. > */ > - if (old_block_type == MIGRATE_MOVABLE) > + if (block_type == MIGRATE_MOVABLE) > alike_pages = pageblock_nr_pages > - (free_pages + movable_pages); > else > @@ -1816,13 +1846,16 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, > * compatible migratability as our allocation, claim the whole block. > */ > if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || > - page_group_by_mobility_disabled) > + page_group_by_mobility_disabled) { > + move_freepages(zone, start_pfn, end_pfn, start_type); > set_pageblock_migratetype(page, start_type); > - > - return; > + return __rmqueue_smallest(zone, order, start_type); > + } > > single_page: > - move_to_free_list(page, zone, current_order, start_type); > + del_page_from_free_list(page, zone, current_order); > + expand(zone, page, order, current_order, block_type); > + return page; > }