---
include/linux/huge_mm.h | 9 ++++++
include/linux/memcontrol.h | 4 +++
include/linux/mm_types.h | 1 +
mm/huge_memory.c | 76 +++++++++++++++++++++++++++++++++++++++-------
mm/memcontrol.c | 24 +++++++++++++++
5 files changed, 103 insertions(+), 11 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 45ede62..61c9ffd 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -267,6 +267,15 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
+static inline struct list_head *page_deferred_list(struct page *page)
+{
+ /*
+ * Global or memcg deferred list in the second tail pages is
+ * occupied by compound_head.
+ */
+ return &page[2].deferred_list;
+}
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5771816..cace365 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -312,6 +312,10 @@ struct mem_cgroup {
struct list_head event_list;
spinlock_t event_list_lock;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ struct deferred_split deferred_split_queue;
+#endif
+
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3a37a89..156640c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -139,6 +139,7 @@ struct page {
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
unsigned long _compound_pad_2;
+ /* For both global and memcg */
struct list_head deferred_list;
};
struct { /* Page table pages */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e0d8e08..c9a596e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -495,11 +495,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
return pmd;
}
-static inline struct list_head *page_deferred_list(struct page *page)
+#ifdef CONFIG_MEMCG
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
{
- /* ->lru in the tail pages is occupied by compound_head. */
- return &page[2].deferred_list;
+ struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+ if (memcg)
+ return &memcg->deferred_split_queue;
+ else
+ return &pgdat->deferred_split_queue;
+}
+#else
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+{
+ struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+ return &pgdat->deferred_split_queue;
}
+#endif
void prep_transhuge_page(struct page *page)
{
@@ -2658,7 +2672,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
{
struct page *head = compound_head(page);
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int count, mapcount, extra_pins, ret;
@@ -2794,8 +2808,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
void free_transhuge_page(struct page *page)
{
- struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
unsigned long flags;
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
@@ -2809,17 +2822,37 @@ void free_transhuge_page(struct page *page)
void deferred_split_huge_page(struct page *page)
{
- struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+ struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+ struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
unsigned long flags;
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
+ /*
+ * The try_to_unmap() in page reclaim path might reach here too,
+ * this may cause a race condition to corrupt deferred split queue.
+ * And, if page reclaim is already handling the same page, it is
+ * unnecessary to handle it again in shrinker.
+ *
+ * Check PageSwapCache to determine if the page is being
+ * handled by page reclaim since THP swap would add the page into
+ * swap cache before calling try_to_unmap().
+ */
+ if (PageSwapCache(page))
+ return;
+
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
if (list_empty(page_deferred_list(page))) {
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
ds_queue->split_queue_len++;
+#ifdef CONFIG_MEMCG
+ if (memcg)
+ memcg_set_shrinker_bit(memcg, page_to_nid(page),
+ deferred_split_shrinker.id);
+#endif
}
spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
}
@@ -2827,8 +2860,19 @@ void deferred_split_huge_page(struct page *page)
static unsigned long deferred_split_count(struct shrinker *shrink,
struct shrink_control *sc)
{
+ struct deferred_split *ds_queue;
struct pglist_data *pgdata = NODE_DATA(sc->nid);
- struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+
+#ifdef CONFIG_MEMCG
+ if (!sc->memcg) {
+ ds_queue = &pgdata->deferred_split_queue;
+ return READ_ONCE(ds_queue->split_queue_len);
+ }
+
+ ds_queue = &sc->memcg->deferred_split_queue;
+#else
+ ds_queue = &pgdata->deferred_split_queue;
+#endif
return READ_ONCE(ds_queue->split_queue_len);
}