---
include/linux/mmzone.h | 5 ++
mm/huge_memory.c | 129 ++++++++++++++++++++++++++++-------------
mm/memcontrol.c | 3 +
mm/page_alloc.c | 2 +
4 files changed, 100 insertions(+), 39 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bda20282746b..f748542745ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -684,7 +684,12 @@ struct deferred_split {
spinlock_t split_queue_lock;
struct list_head split_queue;
unsigned long split_queue_len;
+ unsigned int deferred_split_calls;
+ struct work_struct deferred_split_work;
};
+
+void flush_deferred_split_queue(struct work_struct *work);
+void flush_deferred_split_queue_memcg(struct work_struct *work);
#endif
/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c5cb6dcd6c69..bb7bef856e38 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2842,43 +2842,6 @@ void free_transhuge_page(struct page *page)
free_compound_page(page);
}
-void deferred_split_huge_page(struct page *page)
-{
- struct deferred_split *ds_queue = get_deferred_split_queue(page);
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
-#endif
- unsigned long flags;
-
- VM_BUG_ON_PAGE(!PageTransHuge(page), page);
-
- /*
- * The try_to_unmap() in page reclaim path might reach here too,
- * this may cause a race condition to corrupt deferred split queue.
- * And, if page reclaim is already handling the same page, it is
- * unnecessary to handle it again in shrinker.
- *
- * Check PageSwapCache to determine if the page is being
- * handled by page reclaim since THP swap would add the page into
- * swap cache before calling try_to_unmap().
- */
- if (PageSwapCache(page))
- return;
-
- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
- if (list_empty(page_deferred_list(page))) {
- count_vm_event(THP_DEFERRED_SPLIT_PAGE);
- list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
- ds_queue->split_queue_len++;
-#ifdef CONFIG_MEMCG
- if (memcg)
- memcg_set_shrinker_bit(memcg, page_to_nid(page),
- deferred_split_shrinker.id);
-#endif
- }
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
-}
-
static unsigned long deferred_split_count(struct shrinker *shrink,
struct shrink_control *sc)
{
@@ -2895,8 +2858,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
static unsigned long deferred_split_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
- struct pglist_data *pgdata = NODE_DATA(sc->nid);
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue = NULL;
unsigned long flags;
LIST_HEAD(list), *pos, *next;
struct page *page;
@@ -2906,6 +2868,10 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
if (sc->memcg)
ds_queue = &sc->memcg->deferred_split_queue;
#endif
+ if (!ds_queue) {
+ struct pglist_data *pgdata = NODE_DATA(sc->nid);
+ ds_queue = &pgdata->deferred_split_queue;
+ }
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
/* Take pin on all head pages to avoid freeing them under us */
@@ -2957,6 +2923,91 @@ static struct shrinker deferred_split_shrinker = {
SHRINKER_NONSLAB,
};
+static void __flush_deferred_split_queue(struct pglist_data *pgdata,
+ struct mem_cgroup *memcg)
+{
+ struct shrink_control sc;
+
+ sc.nid = pgdata ? pgdata->node_id : 0;
+ sc.memcg = memcg;
+ sc.nr_to_scan = 0; /* Unlimited */
+
+ deferred_split_scan(NULL, &sc);
+}
+
+void flush_deferred_split_queue(struct work_struct *work)
+{
+ struct deferred_split *ds_queue;
+ struct pglist_data *pgdata;
+
+ ds_queue = container_of(work, struct deferred_split,
+ deferred_split_work);
+ pgdata = container_of(ds_queue, struct pglist_data,
+ deferred_split_queue);
+ __flush_deferred_split_queue(pgdata, NULL);
+}
+
+#ifdef CONFIG_MEMCG
+void flush_deferred_split_queue_memcg(struct work_struct *work)
+{
+ struct deferred_split *ds_queue;
+ struct mem_cgroup *memcg;
+
+ ds_queue = container_of(work, struct deferred_split,
+ deferred_split_work);
+ memcg = container_of(ds_queue, struct mem_cgroup,
+ deferred_split_queue);
+ __flush_deferred_split_queue(NULL, memcg);
+}
+#endif
+
+#define NR_CALLS_TO_SPLIT 32
+#define NR_PAGES_ON_QUEUE_TO_SPLIT 16
+
+void deferred_split_huge_page(struct page *page)
+{
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+ struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
+ unsigned long flags;
+
+ VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+
+ /*
+ * The try_to_unmap() in page reclaim path might reach here too,
+ * this may cause a race condition to corrupt deferred split queue.
+ * And, if page reclaim is already handling the same page, it is
+ * unnecessary to handle it again in shrinker.
+ *
+ * Check PageSwapCache to determine if the page is being
+ * handled by page reclaim since THP swap would add the page into
+ * swap cache before calling try_to_unmap().
+ */
+ if (PageSwapCache(page))
+ return;
+
+ spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ if (list_empty(page_deferred_list(page))) {
+ count_vm_event(THP_DEFERRED_SPLIT_PAGE);
+ list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+ ds_queue->split_queue_len++;
+ ds_queue->deferred_split_calls++;
+#ifdef CONFIG_MEMCG
+ if (memcg)
+ memcg_set_shrinker_bit(memcg, page_to_nid(page),
+ deferred_split_shrinker.id);
+#endif
+ }
+
+ if (ds_queue->split_queue_len > NR_PAGES_ON_QUEUE_TO_SPLIT &&
+ ds_queue->deferred_split_calls > NR_CALLS_TO_SPLIT) {
+ ds_queue->deferred_split_calls = 0;
+ schedule_work(&ds_queue->deferred_split_work);
+ }
+ spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+}
+
#ifdef CONFIG_DEBUG_FS
static int split_huge_pages_set(void *data, u64 val)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c313c49074ca..67305ec75fdc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5085,6 +5085,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
memcg->deferred_split_queue.split_queue_len = 0;
+ memcg->deferred_split_queue.deferred_split_calls = 0;
+ INIT_WORK(&memcg->deferred_split_queue.deferred_split_work,
+ flush_deferred_split_queue_memcg);
#endif
idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
return memcg;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 15c2050c629b..2f52e538a26f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6674,6 +6674,8 @@ static void pgdat_init_split_queue(struct pglist_data *pgdat)
spin_lock_init(&ds_queue->split_queue_lock);
INIT_LIST_HEAD(&ds_queue->split_queue);
ds_queue->split_queue_len = 0;
+ ds_queue->deferred_split_calls = 0;
+ INIT_WORK(&ds_queue->deferred_split_work, flush_deferred_split_queue);
}
#else
static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
--
2.21.0