The fair zone allocation policy interleaves allocation requests between zones to avoid an age inversion problem whereby new pages are reclaimed to balance a zone. Reclaim is now node-based so this should no longer be an issue and the fair zone allocation policy is not free. This patch removes it. Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> --- include/linux/mmzone.h | 2 -- mm/internal.h | 1 - mm/page_alloc.c | 76 +------------------------------------------------- mm/vmstat.c | 1 - 4 files changed, 1 insertion(+), 79 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7372d59c5e3b..ccba10bd3241 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -117,7 +117,6 @@ struct zone_padding { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, - NR_ALLOC_BATCH, NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, @@ -515,7 +514,6 @@ struct zone { enum zone_flags { ZONE_OOM_LOCKED, /* zone is in OOM killer zonelist */ - ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ }; enum pgdat_flags { diff --git a/mm/internal.h b/mm/internal.h index 1498e5c850f1..ad623eb6caa3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -469,7 +469,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ -#define ALLOC_FAIR 0x100 /* fair zone allocation */ enum ttu_flags; struct tlbflush_unmap_batch; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e0cb0a6fe6c4..4fcd6298b9a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2372,11 +2372,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, get_pcppage_migratetype(page)); } - __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); - if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 && - !test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) - set_bit(ZONE_FAIR_DEPLETED, &zone->flags); - __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); @@ -2561,40 +2556,18 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order, } #ifdef CONFIG_NUMA -static bool zone_local(struct zone *local_zone, struct zone *zone) -{ - return local_zone->node == zone->node; -} - static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) < RECLAIM_DISTANCE; } #else /* CONFIG_NUMA */ -static bool zone_local(struct zone *local_zone, struct zone *zone) -{ - return true; -} - static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) { return true; } #endif /* CONFIG_NUMA */ -static void reset_alloc_batches(struct zone *preferred_zone) -{ - struct zone *zone = preferred_zone->zone_pgdat->node_zones; - - do { - mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - clear_bit(ZONE_FAIR_DEPLETED, &zone->flags); - } while (zone++ != preferred_zone); -} - /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -2607,11 +2580,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct zoneref *z; struct page *page = NULL; struct zone *zone; - int nr_fair_skipped = 0; - bool zonelist_rescan; - -zonelist_scan: - zonelist_rescan = false; /* * Scan zonelist, looking for a zone with enough free. @@ -2626,20 +2594,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, !cpuset_zone_allowed(zone, gfp_mask)) continue; /* - * Distribute pages in proportion to the individual - * zone size to ensure fair page aging. The zone a - * page was allocated in should have no effect on the - * time the page has in memory before being reclaimed. - */ - if (alloc_flags & ALLOC_FAIR) { - if (!zone_local(ac->preferred_zone, zone)) - break; - if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) { - nr_fair_skipped++; - continue; - } - } - /* * When allocating a page cache page for writing, we * want to get it from a zone that is within its dirty * limit, such that no single zone holds more than its @@ -2718,27 +2672,6 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, } } - /* - * The first pass makes sure allocations are spread fairly within the - * local node. However, the local node might have free pages left - * after the fairness batches are exhausted, and remote zones haven't - * even been considered yet. Try once more without fairness, and - * include remote zones now, before entering the slowpath and waking - * kswapd: prefer spilling to a remote zone over swapping locally. - */ - if (alloc_flags & ALLOC_FAIR) { - alloc_flags &= ~ALLOC_FAIR; - if (nr_fair_skipped) { - zonelist_rescan = true; - reset_alloc_batches(ac->preferred_zone); - } - if (nr_online_nodes > 1) - zonelist_rescan = true; - } - - if (zonelist_rescan) - goto zonelist_scan; - return NULL; } @@ -3404,7 +3337,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zoneref *preferred_zoneref; struct page *page = NULL; unsigned int cpuset_mems_cookie; - int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; + int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { .high_zoneidx = gfp_zone(gfp_mask), @@ -5583,9 +5516,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) zone_seqlock_init(zone); zone_pcp_init(zone); - /* For bootup, initialized properly in watermark setup */ - mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); - if (!size) continue; @@ -6425,10 +6355,6 @@ static void __setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); - __mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - spin_unlock_irqrestore(&zone->lock, flags); } diff --git a/mm/vmstat.c b/mm/vmstat.c index 4d8617b02032..34fe40824213 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -942,7 +942,6 @@ int fragmentation_index(struct zone *zone, unsigned int order) const char * const vmstat_text[] = { /* enum zone_stat_item countes */ "nr_free_pages", - "nr_alloc_batch", "nr_mlock", "nr_slab_reclaimable", "nr_slab_unreclaimable", -- 2.6.4 -- Mel Gorman SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>