Hi, ----- Original Message ----- > From: Vlastimil Babka <vbabka@xxxxxxx> > To: linux-mm@xxxxxxxxx > Cc: linux-kernel@xxxxxxxxxxxxxxx; Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>; Hugh Dickins <hughd@xxxxxxxxxx>; Andrea Arcangeli <aarcange@xxxxxxxxxx>; Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>; Rik van Riel <riel@xxxxxxxxxx>; Mel Gorman <mgorman@xxxxxxx>; David Rientjes <rientjes@xxxxxxxxxx>; Joonsoo Kim <iamjoonsoo.kim@xxxxxxx>; Vlastimil Babka <vbabka@xxxxxxx> > Sent: Monday, 3 August 2015 9:55 PM > Subject: [RFC v3 2/2] mm, compaction: make kcompactd rely on sysctl_extfrag_threshold > >T he previous patch introduced kcompactd kthreads which are meant to keep > memory fragmentation lower than what kswapd achieves through its > reclaim/compaction activity. In order to do that, it needs a stricter criteria > to determine when to start/stop compacting, than the standard criteria that > try to satisfy a single next high-order allocation request. This patch > provides such criteria with minimal changes and no new tunables. > > This patch uses the existing sysctl_extfrag_threshold tunable. This tunable > currently determines when direct compaction should stop trying to satisfy an > allocation - that happens when a page of desired order has not been made > available, but the fragmentation already dropped below given threshold, so we > expect further compaction to be too costly and possibly fail anyway. > > For kcompactd, we simply ignore whether the page has been available, and > continue compacting, until fragmentation drops below the threshold (or the > whole zone is scanned). > > Not-yet-signed-off-by: Vlastimil Babka <vbabka@xxxxxxx> > --- > include/linux/compaction.h | 7 ++++--- > mm/compaction.c | 37 ++++++++++++++++++++++++++----------- > mm/internal.h | 1 + > mm/vmscan.c | 10 +++++----- > mm/vmstat.c | 12 +++++++----- > 5 files changed, 43 insertions(+), 24 deletions(-) > > diff --git a/include/linux/compaction.h b/include/linux/compaction.h > index 8cd1fb5..c615465 100644 > --- a/include/linux/compaction.h > +++ b/include/linux/compaction.h > @@ -36,14 +36,15 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, > int write, > void __user *buffer, size_t *length, loff_t *ppos); > extern int sysctl_compact_unevictable_allowed; > > -extern int fragmentation_index(struct zone *zone, unsigned int order); > +extern int fragmentation_index(struct zone *zone, unsigned int order, > + bool ignore_suitable); We would like to retain the original fragmentation_index as it is. Because in some cases people may be using it without kcompactd. In such cases, future kernel upgrades will suffer. In my opinion fragmentation_index should work just based on zones and order. And I guess, for kcompactd, we must be definitely having CONFIG_COMPACTION_KCOMPACTD?./ > extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, > int alloc_flags, const struct alloc_context *ac, > enum migrate_mode mode, int *contended); > extern void compact_pgdat(pg_data_t *pgdat, int order); > extern void reset_isolation_suitable(pg_data_t *pgdat); > extern unsigned long compaction_suitable(struct zone *zone, int order, > - int alloc_flags, int classzone_idx); > + int alloc_flags, int classzone_idx, bool kcompactd); > > extern void defer_compaction(struct zone *zone, int order); > extern bool compaction_deferred(struct zone *zone, int order); > @@ -73,7 +74,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) > } > > static inline unsigned long compaction_suitable(struct zone *zone, int order, > - int alloc_flags, int classzone_idx) > + int alloc_flags, int classzone_idx, bool kcompactd) > { > return COMPACT_SKIPPED; > } > diff --git a/mm/compaction.c b/mm/compaction.c > index b051412..62b9e51 100644 > --- a/mm/compaction.c > +++ b/mm/compaction.c > @@ -1183,6 +1183,19 @@ static int __compact_finished(struct zone *zone, struct > compact_control *cc, > cc->alloc_flags)) > return COMPACT_CONTINUE; > > + if (cc->kcompactd) { > + /* > + * kcompactd continues even if watermarks are met, until the > + * fragmentation index is so low that direct compaction > + * wouldn't be attempted > + */ > + int fragindex = fragmentation_index(zone, cc->order, true); > + if (fragindex <= sysctl_extfrag_threshold) > + return COMPACT_NOT_SUITABLE_ZONE; > + else > + return COMPACT_CONTINUE; > + } > + > /* Direct compactor: Is a suitable page free? */ > for (order = cc->order; order < MAX_ORDER; order++) { > struct free_area *area = &zone->free_area[order]; > @@ -1231,7 +1244,7 @@ static int compact_finished(struct zone *zone, struct > compact_control *cc, > * COMPACT_CONTINUE - If compaction should run now > */ > static unsigned long __compaction_suitable(struct zone *zone, int order, > - int alloc_flags, int classzone_idx) > + int alloc_flags, int classzone_idx, bool kcompactd) > { > int fragindex; > unsigned long watermark; > @@ -1246,10 +1259,10 @@ static unsigned long __compaction_suitable(struct zone > *zone, int order, > watermark = low_wmark_pages(zone); > /* > * If watermarks for high-order allocation are already met, there > - * should be no need for compaction at all. > + * should be no need for compaction at all, unless it's kcompactd. > */ > - if (zone_watermark_ok(zone, order, watermark, classzone_idx, > - alloc_flags)) > + if (!kcompactd && zone_watermark_ok(zone, order, watermark, > + classzone_idx, alloc_flags)) > return COMPACT_PARTIAL; > > /* > @@ -1272,7 +1285,7 @@ static unsigned long __compaction_suitable(struct zone > *zone, int order, > * > * Only compact if a failure would be due to fragmentation. > */ > - fragindex = fragmentation_index(zone, order); > + fragindex = fragmentation_index(zone, order, kcompactd); > if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) > return COMPACT_NOT_SUITABLE_ZONE; > > @@ -1280,11 +1293,12 @@ static unsigned long __compaction_suitable(struct zone > *zone, int order, > } > > unsigned long compaction_suitable(struct zone *zone, int order, > - int alloc_flags, int classzone_idx) > + int alloc_flags, int classzone_idx, bool kcompactd) > { > unsigned long ret; > > - ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx); > + ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx, > + kcompactd); > trace_mm_compaction_suitable(zone, order, ret); > if (ret == COMPACT_NOT_SUITABLE_ZONE) > ret = COMPACT_SKIPPED; > @@ -1302,7 +1316,7 @@ static int compact_zone(struct zone *zone, struct > compact_control *cc) > unsigned long last_migrated_pfn = 0; > > ret = compaction_suitable(zone, cc->order, cc->alloc_flags, > - cc->classzone_idx); > + cc->classzone_idx, cc->kcompactd); > switch (ret) { > case COMPACT_PARTIAL: > case COMPACT_SKIPPED: > @@ -1731,8 +1745,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat, int > order) > for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { > zone = &pgdat->node_zones[zoneid]; > > - if (compaction_suitable(zone, order, 0, zoneid) == > - COMPACT_CONTINUE) > + if (compaction_suitable(zone, order, 0, zoneid, true) == > + COMPACT_CONTINUE) > return true; > } > > @@ -1750,6 +1764,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) > struct compact_control cc = { > .order = pgdat->kcompactd_max_order, > .mode = MIGRATE_SYNC_LIGHT, > + .kcompactd = true, > //TODO: do this or not? > .ignore_skip_hint = true, > }; > @@ -1760,7 +1775,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) > if (!populated_zone(zone)) > continue; > > - if (compaction_suitable(zone, cc.order, 0, zoneid) != > + if (compaction_suitable(zone, cc.order, 0, zoneid, true) != > COMPACT_CONTINUE) > continue; > > diff --git a/mm/internal.h b/mm/internal.h > index 36b23f1..2cea51a 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -184,6 +184,7 @@ struct compact_control { > unsigned long migrate_pfn; /* isolate_migratepages search base */ > enum migrate_mode mode; /* Async or sync migration mode */ > bool ignore_skip_hint; /* Scan blocks even if marked skip */ > + bool kcompactd; /* We are in kcompactd kthread */ > int order; /* order a direct compactor needs */ > const gfp_t gfp_mask; /* gfp mask of a direct compactor */ > const int alloc_flags; /* alloc flags of a direct compactor */ > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 075f53c..f6582b6 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -2339,7 +2339,7 @@ static inline bool should_continue_reclaim(struct zone > *zone, > return true; > > /* If compaction would go ahead or the allocation would succeed, stop */ > - switch (compaction_suitable(zone, sc->order, 0, 0)) { > + switch (compaction_suitable(zone, sc->order, 0, 0, false)) { > case COMPACT_PARTIAL: > case COMPACT_CONTINUE: > return false; > @@ -2467,7 +2467,7 @@ static inline bool compaction_ready(struct zone *zone, int > order) > * If compaction is not ready to start and allocation is not likely > * to succeed without it, then keep reclaiming. > */ > - if (compaction_suitable(zone, order, 0, 0) == COMPACT_SKIPPED) > + if (compaction_suitable(zone, order, 0, 0, false) == COMPACT_SKIPPED) > return false; > > return watermark_ok; > @@ -2941,7 +2941,7 @@ static bool zone_balanced(struct zone *zone, int order, > return false; > > if (IS_ENABLED(CONFIG_COMPACTION) && order && > compaction_suitable(zone, > - order, 0, classzone_idx) == COMPACT_SKIPPED) > + order, 0, classzone_idx, false) == COMPACT_SKIPPED) > return false; > > return true; > @@ -3065,8 +3065,8 @@ static bool kswapd_shrink_zone(struct zone *zone, > * from memory. Do not reclaim more than needed for compaction. > */ > if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && > - compaction_suitable(zone, sc->order, 0, classzone_idx) > - != COMPACT_SKIPPED) > + compaction_suitable(zone, sc->order, 0, classzone_idx, > + false) != COMPACT_SKIPPED) > testorder = 0; > > /* > diff --git a/mm/vmstat.c b/mm/vmstat.c > index 4f5cd97..9916110 100644 > --- a/mm/vmstat.c > +++ b/mm/vmstat.c > @@ -643,7 +643,8 @@ static void fill_contig_page_info(struct zone *zone, > * The value can be used to determine if page reclaim or compaction > * should be used > */ > -static int __fragmentation_index(unsigned int order, struct contig_page_info > *info) > +static int __fragmentation_index(unsigned int order, > + struct contig_page_info *info, bool ignore_suitable) > { > unsigned long requested = 1UL << order; > > @@ -651,7 +652,7 @@ static int __fragmentation_index(unsigned int order, struct > contig_page_info *in > return 0; > > /* Fragmentation index only makes sense when a request would fail */ > - if (info->free_blocks_suitable) > + if (!ignore_suitable && info->free_blocks_suitable) > return -1000; > > /* > @@ -664,12 +665,13 @@ static int __fragmentation_index(unsigned int order, > struct contig_page_info *in > } > > /* Same as __fragmentation index but allocs contig_page_info on stack */ > -int fragmentation_index(struct zone *zone, unsigned int order) > +int fragmentation_index(struct zone *zone, unsigned int order, > + bool ignore_suitable) > { > struct contig_page_info info; > > fill_contig_page_info(zone, order, &info); > - return __fragmentation_index(order, &info); > + return __fragmentation_index(order, &info, ignore_suitable); > } > #endif > > @@ -1635,7 +1637,7 @@ static void extfrag_show_print(struct seq_file *m, > zone->name); > for (order = 0; order < MAX_ORDER; ++order) { > fill_contig_page_info(zone, order, &info); > - index = __fragmentation_index(order, &info); > + index = __fragmentation_index(order, &info, false); > seq_printf(m, "%d.%03d ", index / 1000, index % 1000); > > } > > -- > 2.4.6 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> > email@xxxxxxxxx </a> > -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>