On Thu, Jun 03, 2021 at 01:12:10PM +0200, Vlastimil Babka wrote: > > From a functional point of view, a patch like this is necessary to > > make bulk allocation of high-order pages work with similar performance > > to order-0 bulk allocations. The bulk allocator is not updated in this > > series as it would have to be determined by bulk allocation users how > > they want to track the order of pages allocated with the bulk allocator. > > > > Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> > > Acked-by: Vlastimil Babka <vbabka@xxxxxxx> > Thanks! > > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h > > index 0ed61f32d898..1ceaa5f44db6 100644 > > --- a/include/linux/mmzone.h > > +++ b/include/linux/mmzone.h > > @@ -333,6 +333,24 @@ enum zone_watermarks { > > NR_WMARK > > }; > > > > +/* > > + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional > > + * for pageblock size for THP if configured. > > + */ > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > > +#define NR_PCP_THP 1 > > +#else > > +#define NR_PCP_THP 0 > > +#endif > > +#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP)) > > + > > +/* > > + * Shift to encode migratetype in order in the least significant bits and > > + * migratetype in the higher bits. > > Hard for me to understand that comment. I would describe what the code does as > e,g, "Shift to encode migratetype and order in the same integer, with order in > the least significant bit ..." etc. That comment is a lot better :) > > > + */ > > +#define NR_PCP_ORDER_SHIFT 8 > > Also ORDER_SHIFT is a bit misnomer, it's more precisely an ORDER_WIDTH, and we > are shifting migratetype with it, not order. I'm just comparing with how we name > nid/zid/etc bits in page flags. > Ok, I can change that. > > @@ -687,10 +687,53 @@ static void bad_page(struct page *page, const char *reason) > > add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); > > } > > > > +static inline unsigned int order_to_pindex(int migratetype, int order) > > +{ > > + int base = order; > > + > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > > + if (order > PAGE_ALLOC_COSTLY_ORDER) { > > + VM_BUG_ON(order != pageblock_order); > > + base = PAGE_ALLOC_COSTLY_ORDER + 1; > > + } > > +#else > > + VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); > > +#endif > > + > > + return (MIGRATE_PCPTYPES * base) + migratetype; > > +} > > + > > +static inline int pindex_to_order(unsigned int pindex) > > +{ > > + int order = pindex / PAGE_ALLOC_COSTLY_ORDER; > > This seems wrong, shouldn't we divide by MIGRATE_PCPTYPES? > It just happens to be the same number, so testing won't flag this. > Whoops, yes, basic algebra says you're right so well spotted. If PAGE_ALLOC_COSTLY_ORDER ever changed for some reason, this would blow up. So this? diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bbe05289b121..f1bed5b847ec 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -345,11 +345,11 @@ enum zone_watermarks { #define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP)) /* - * Shift to encode migratetype in order in the least significant bits and - * migratetype in the higher bits. + * Shift to encode migratetype and order in the same integer, with order + * in the least significant bits. */ -#define NR_PCP_ORDER_SHIFT 8 -#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_SHIFT) - 1) +#define NR_PCP_ORDER_WIDTH 8 +#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1) #define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) #define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 49f3c7dcdfca..7be71f9ad9f0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -694,7 +694,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order) static inline int pindex_to_order(unsigned int pindex) { - int order = pindex / PAGE_ALLOC_COSTLY_ORDER; + int order = pindex / MIGRATE_PCPTYPES; #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (order > PAGE_ALLOC_COSTLY_ORDER) { @@ -1487,7 +1487,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, batch_free = count; order = pindex_to_order(pindex); - BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_SHIFT)); + BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH)); do { page = list_last_entry(list, struct page, lru); /* must delete to avoid corrupting pcp list */ @@ -1499,7 +1499,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, continue; /* Encode order with the migratetype */ - page->index <<= NR_PCP_ORDER_SHIFT; + page->index <<= NR_PCP_ORDER_WIDTH; page->index |= order; list_add_tail(&page->lru, &head); @@ -1537,7 +1537,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* mt has been encoded with the order (see above) */ order = mt & NR_PCP_ORDER_MASK; - mt >>= NR_PCP_ORDER_SHIFT; + mt >>= NR_PCP_ORDER_WIDTH; /* MIGRATE_ISOLATE page should not go to pcplists */ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); -- Mel Gorman SUSE Labs