Signed-off-by: zhou xianrong <xianrong.zhou@xxxxxxxxxxxxx>
Signed-off-by: feng ruxian <ruxian.feng@xxxxxxxxxxxxx>
---
include/linux/mmzone.h | 6 ++--
include/trace/events/vmscan.h | 20 +++++++----
mm/page_alloc.c | 5 +--
mm/vmscan.c | 63 +++++++++++++++++++++++++++++------
4 files changed, 73 insertions(+), 21 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b593316bff3d..7dd38d7372b9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -301,6 +301,8 @@ struct lruvec {
#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
/* Isolate unevictable pages */
#define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
+/* Isolate none cma pages */
+#define ISOLATE_NONCMA ((__force isolate_mode_t)0x10)
/* LRU Isolation modes. */
typedef unsigned __bitwise isolate_mode_t;
@@ -756,7 +758,7 @@ typedef struct pglist_data {
wait_queue_head_t pfmemalloc_wait;
struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/end() */
- int kswapd_order;
+ int kswapd_order, kswapd_migratetype;
enum zone_type kswapd_highest_zoneidx;
int kswapd_failures; /* Number of 'reclaimed == 0' runs */
@@ -840,7 +842,7 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
void build_all_zonelists(pg_data_t *pgdat);
void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
- enum zone_type highest_zoneidx);
+ int migratetype, enum zone_type highest_zoneidx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int highest_zoneidx, unsigned int alloc_flags,
long free_pages);
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 2070df64958e..41bbafdfde84 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -51,37 +51,41 @@ TRACE_EVENT(mm_vmscan_kswapd_sleep,
TRACE_EVENT(mm_vmscan_kswapd_wake,
- TP_PROTO(int nid, int zid, int order),
+ TP_PROTO(int nid, int zid, int order, int mt),
- TP_ARGS(nid, zid, order),
+ TP_ARGS(nid, zid, order, mt),
TP_STRUCT__entry(
__field( int, nid )
__field( int, zid )
__field( int, order )
+ __field( int, mt )
),
TP_fast_assign(
__entry->nid = nid;
__entry->zid = zid;
__entry->order = order;
+ __entry->mt = mt;
),
- TP_printk("nid=%d order=%d",
+ TP_printk("nid=%d order=%d migratetype=%d",
__entry->nid,
- __entry->order)
+ __entry->order,
+ __entry->mt)
);
TRACE_EVENT(mm_vmscan_wakeup_kswapd,
- TP_PROTO(int nid, int zid, int order, gfp_t gfp_flags),
+ TP_PROTO(int nid, int zid, int order, int mt, gfp_t gfp_flags),
- TP_ARGS(nid, zid, order, gfp_flags),
+ TP_ARGS(nid, zid, order, mt, gfp_flags),
TP_STRUCT__entry(
__field( int, nid )
__field( int, zid )
__field( int, order )
+ __field( int, mt )
__field( gfp_t, gfp_flags )
),
@@ -89,12 +93,14 @@ TRACE_EVENT(mm_vmscan_wakeup_kswapd,
__entry->nid = nid;
__entry->zid = zid;
__entry->order = order;
+ __entry->mt = mt;
__entry->gfp_flags = gfp_flags;
),
- TP_printk("nid=%d order=%d gfp_flags=%s",
+ TP_printk("nid=%d order=%d migratetype=%d gfp_flags=%s",
__entry->nid,
__entry->order,
+ __entry->mt,
show_gfp_flags(__entry->gfp_flags))
);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 519a60d5b6f7..45ceb15721b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3517,7 +3517,7 @@ struct page *rmqueue(struct zone *preferred_zone,
/* Separate test+clear to avoid unnecessary atomics */
if (test_bit(ZONE_BOOSTED_WATERMARK, &zone->flags)) {
clear_bit(ZONE_BOOSTED_WATERMARK, &zone->flags);
- wakeup_kswapd(zone, 0, 0, zone_idx(zone));
+ wakeup_kswapd(zone, 0, 0, migratetype, zone_idx(zone));
}
VM_BUG_ON_PAGE(page && bad_range(zone, page), page);
@@ -4426,11 +4426,12 @@ static void wake_all_kswapds(unsigned int order, gfp_t gfp_mask,
struct zone *zone;
pg_data_t *last_pgdat = NULL;
enum zone_type highest_zoneidx = ac->highest_zoneidx;
+ int migratetype = ac->migratetype;
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, highest_zoneidx,
ac->nodemask) {
if (last_pgdat != zone->zone_pgdat)
- wakeup_kswapd(zone, gfp_mask, order, highest_zoneidx);
+ wakeup_kswapd(zone, gfp_mask, order, migratetype, highest_zoneidx);
last_pgdat = zone->zone_pgdat;
}
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b1b574ad199d..184f0c4c7151 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -99,6 +99,9 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;
+ /* Can cma pages be reclaimed? */
+ unsigned int may_cma:1;
+
/*
* Cgroups are not reclaimed below their configured memory.low,
* unless we threaten to OOM. If any cgroups are skipped due to
@@ -286,6 +289,11 @@ static bool writeback_throttling_sane(struct scan_control *sc)
}
#endif
+static bool movable_reclaim(gfp_t gfp_mask)
+{
+ return is_migrate_movable(gfp_migratetype(gfp_mask));
+}
+
/*
* This misses isolated pages which are not accounted for to save counters.
* As the data only determines if reclaim or compaction continues, it is
@@ -1499,6 +1507,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
.gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY,
.may_unmap = 1,
+ .may_cma = 1,
};
struct reclaim_stat stat;
unsigned int nr_reclaimed;
@@ -1593,6 +1602,9 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
+ if ((mode & ISOLATE_NONCMA) && is_migrate_cma(get_pageblock_migratetype(page)))
+ return ret;
+
return 0;
}
@@ -1647,7 +1659,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long skipped = 0;
unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped);
- isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
+ isolate_mode_t mode;
+
+ mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
+ mode |= (sc->may_cma ? 0 : ISOLATE_NONCMA);
total_scan = 0;
scan = 0;
@@ -2125,6 +2140,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
+ .may_cma = 1,
};
while (!list_empty(page_list)) {
@@ -3253,6 +3269,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
+ .may_cma = movable_reclaim(gfp_mask),
};
/*
@@ -3298,6 +3315,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
.may_unmap = 1,
.reclaim_idx = MAX_NR_ZONES - 1,
.may_swap = !noswap,
+ .may_cma = 1,
};
WARN_ON_ONCE(!current->reclaim_state);
@@ -3341,6 +3359,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = may_swap,
+ .may_cma = 1,
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
@@ -3548,7 +3567,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
* or lower is eligible for reclaim until at least one usable zone is
* balanced.
*/
-static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
+static int balance_pgdat(pg_data_t *pgdat, int order, int migratetype, int highest_zoneidx)
{
int i;
unsigned long nr_soft_reclaimed;
@@ -3650,6 +3669,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
*/
sc.may_writepage = !laptop_mode && !nr_boost_reclaim;
sc.may_swap = !nr_boost_reclaim;
+ sc.may_cma = is_migrate_movable(migratetype);
/*
* Do some background aging of the anon list, to give
@@ -3771,8 +3791,15 @@ static enum zone_type kswapd_highest_zoneidx(pg_data_t *pgdat,
return curr_idx == MAX_NR_ZONES ? prev_highest_zoneidx : curr_idx;
}
+static int kswapd_migratetype(pg_data_t *pgdat, int prev_migratetype)
+{
+ int curr_migratetype = READ_ONCE(pgdat->kswapd_migratetype);
+
+ return curr_migratetype == MIGRATE_TYPES ? prev_migratetype : curr_migratetype;
+}
+
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
- unsigned int highest_zoneidx)
+ int migratetype, unsigned int highest_zoneidx)
{
long remaining = 0;
DEFINE_WAIT(wait);
@@ -3807,8 +3834,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
remaining = schedule_timeout(HZ/10);
/*
- * If woken prematurely then reset kswapd_highest_zoneidx and
- * order. The values will either be from a wakeup request or
+ * If woken prematurely then reset kswapd_highest_zoneidx, order
+ * and migratetype. The values will either be from a wakeup request or
* the previous request that slept prematurely.
*/
if (remaining) {
@@ -3818,6 +3845,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
if (READ_ONCE(pgdat->kswapd_order) < reclaim_order)
WRITE_ONCE(pgdat->kswapd_order, reclaim_order);
+
+ if (!is_migrate_movable(READ_ONCE(pgdat->kswapd_migratetype)))
+ WRITE_ONCE(pgdat->kswapd_migratetype,
+ kswapd_migratetype(pgdat, migratetype));
}
finish_wait(&pgdat->kswapd_wait, &wait);
@@ -3870,6 +3901,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_o
*/
static int kswapd(void *p)
{
+ int migratetype = 0;
unsigned int alloc_order, reclaim_order;
unsigned int highest_zoneidx = MAX_NR_ZONES - 1;
pg_data_t *pgdat = (pg_data_t*)p;
@@ -3895,23 +3927,27 @@ static int kswapd(void *p)
set_freezable();
WRITE_ONCE(pgdat->kswapd_order, 0);
+ WRITE_ONCE(pgdat->kswapd_migratetype, MIGRATE_TYPES);
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
for ( ; ; ) {
bool ret;
alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
+ migratetype = kswapd_migratetype(pgdat, migratetype);
highest_zoneidx = kswapd_highest_zoneidx(pgdat,
highest_zoneidx);
kswapd_try_sleep:
kswapd_try_to_sleep(pgdat, alloc_order, reclaim_order,
- highest_zoneidx);
+ migratetype, highest_zoneidx);
/* Read the new order and highest_zoneidx */
alloc_order = READ_ONCE(pgdat->kswapd_order);
+ migratetype = kswapd_migratetype(pgdat, migratetype);
highest_zoneidx = kswapd_highest_zoneidx(pgdat,
highest_zoneidx);
WRITE_ONCE(pgdat->kswapd_order, 0);
+ WRITE_ONCE(pgdat->kswapd_migratetype, MIGRATE_TYPES);
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
ret = try_to_freeze();
@@ -3934,8 +3970,8 @@ static int kswapd(void *p)
* request (alloc_order).
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, highest_zoneidx,
- alloc_order);
- reclaim_order = balance_pgdat(pgdat, alloc_order,
+ alloc_order, migratetype);
+ reclaim_order = balance_pgdat(pgdat, alloc_order, migratetype,
highest_zoneidx);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
@@ -3953,11 +3989,12 @@ static int kswapd(void *p)
* has failed or is not needed, still wake up kcompactd if only compaction is
* needed.
*/
-void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
+void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, int migratetype
enum zone_type highest_zoneidx)
{
pg_data_t *pgdat;
enum zone_type curr_idx;
+ int curr_migratetype;
if (!managed_zone(zone))
return;
@@ -3967,6 +4004,7 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
pgdat = zone->zone_pgdat;
curr_idx = READ_ONCE(pgdat->kswapd_highest_zoneidx);
+ curr_migratetype = READ_ONCE(pgdat->kswapd_migratetype);
if (curr_idx == MAX_NR_ZONES || curr_idx < highest_zoneidx)
WRITE_ONCE(pgdat->kswapd_highest_zoneidx, highest_zoneidx);
@@ -3974,6 +4012,9 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
if (READ_ONCE(pgdat->kswapd_order) < order)
WRITE_ONCE(pgdat->kswapd_order, order);
+ if (curr_migratetype == MIGRATE_TYPES || is_migrate_movable(migratetype))
+ WRITE_ONCE(pgdat->kswapd_migratetype, migratetype);
+
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
@@ -3994,7 +4035,7 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
}
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, highest_zoneidx, order,
- gfp_flags);
+ migratetype, gfp_flags);
wake_up_interruptible(&pgdat->kswapd_wait);
}
@@ -4017,6 +4058,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
+ .may_cma = 1,
.hibernation_mode = 1,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
@@ -4176,6 +4218,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1,
+ .may_cma = movable_reclaim(gfp_mask),
.reclaim_idx = gfp_zone(gfp_mask),
};