On Sun, Mar 17, 2013 at 01:04:12PM +0000, Mel Gorman wrote: >Currently kswapd queues dirty pages for writeback if scanning at an elevated >priority but the priority kswapd scans at is not related to the number >of unqueued dirty encountered. Since commit "mm: vmscan: Flatten kswapd >priority loop", the priority is related to the size of the LRU and the >zone watermark which is no indication as to whether kswapd should write >pages or not. > >This patch tracks if an excessive number of unqueued dirty pages are being >encountered at the end of the LRU. If so, it indicates that dirty pages >are being recycled before flusher threads can clean them and flags the >zone so that kswapd will start writing pages until the zone is balanced. > >Signed-off-by: Mel Gorman <mgorman@xxxxxxx> >--- > include/linux/mmzone.h | 8 ++++++++ > mm/vmscan.c | 29 +++++++++++++++++++++++------ > 2 files changed, 31 insertions(+), 6 deletions(-) > >diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h >index ede2749..edd6b98 100644 >--- a/include/linux/mmzone.h >+++ b/include/linux/mmzone.h >@@ -495,6 +495,9 @@ typedef enum { > ZONE_CONGESTED, /* zone has many dirty pages backed by > * a congested BDI > */ >+ ZONE_DIRTY, /* reclaim scanning has recently found >+ * many dirty file pages >+ */ > } zone_flags_t; > > static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) >@@ -517,6 +520,11 @@ static inline int zone_is_reclaim_congested(const struct zone *zone) > return test_bit(ZONE_CONGESTED, &zone->flags); > } > >+static inline int zone_is_reclaim_dirty(const struct zone *zone) >+{ >+ return test_bit(ZONE_DIRTY, &zone->flags); >+} >+ > static inline int zone_is_reclaim_locked(const struct zone *zone) > { > return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); >diff --git a/mm/vmscan.c b/mm/vmscan.c >index af3bb6f..493728b 100644 >--- a/mm/vmscan.c >+++ b/mm/vmscan.c >@@ -675,13 +675,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, > struct zone *zone, > struct scan_control *sc, > enum ttu_flags ttu_flags, >- unsigned long *ret_nr_dirty, >+ unsigned long *ret_nr_unqueued_dirty, > unsigned long *ret_nr_writeback, > bool force_reclaim) > { > LIST_HEAD(ret_pages); > LIST_HEAD(free_pages); > int pgactivate = 0; >+ unsigned long nr_unqueued_dirty = 0; > unsigned long nr_dirty = 0; > unsigned long nr_congested = 0; > unsigned long nr_reclaimed = 0; >@@ -807,14 +808,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, > if (PageDirty(page)) { > nr_dirty++; > >+ if (!PageWriteback(page)) >+ nr_unqueued_dirty++; >+ > /* > * Only kswapd can writeback filesystem pages to >- * avoid risk of stack overflow but do not writeback >- * unless under significant pressure. >+ * avoid risk of stack overflow but only writeback >+ * if many dirty pages have been encountered. > */ > if (page_is_file_cache(page) && > (!current_is_kswapd() || >- sc->priority >= DEF_PRIORITY - 2)) { >+ !zone_is_reclaim_dirty(zone))) { > /* > * Immediately reclaim when written back. > * Similar in principal to deactivate_page() >@@ -959,7 +963,7 @@ keep: > list_splice(&ret_pages, page_list); > count_vm_events(PGACTIVATE, pgactivate); > mem_cgroup_uncharge_end(); >- *ret_nr_dirty += nr_dirty; >+ *ret_nr_unqueued_dirty += nr_unqueued_dirty; > *ret_nr_writeback += nr_writeback; > return nr_reclaimed; > } >@@ -1372,6 +1376,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, > (nr_taken >> (DEF_PRIORITY - sc->priority))) > wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); > >+ /* >+ * Similarly, if many dirty pages are encountered that are not >+ * currently being written then flag that kswapd should start >+ * writing back pages. >+ */ >+ if (global_reclaim(sc) && nr_dirty && >+ nr_dirty >= (nr_taken >> (DEF_PRIORITY - sc->priority))) >+ zone_set_flag(zone, ZONE_DIRTY); >+ > trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, > zone_idx(zone), > nr_scanned, nr_reclaimed, >@@ -2735,8 +2748,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, > end_zone = i; > break; > } else { >- /* If balanced, clear the congested flag */ >+ /* >+ * If balanced, clear the dirty and congested >+ * flags >+ */ > zone_clear_flag(zone, ZONE_CONGESTED); >+ zone_clear_flag(zone, ZONE_DIRTY); Hi Mel, There are two places in balance_pgdat clear ZONE_CONGESTED flag, one is during scan zone which have free_pages <= high_wmark_pages(zone), the other one is zone get balanced after reclaim, it seems that you miss the later one. Regards, Wanpeng Li > } > } > >-- >1.8.1.4 > >-- >To unsubscribe, send a message with 'unsubscribe linux-mm' in >the body to majordomo@xxxxxxxxx. For more info on Linux MM, >see: http://www.linux-mm.org/ . >Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>