We should make thp deferred split queue lock safe when LRU pages reparented. Similar to lock_page_lruvec{_irqsave, _irq}(), we introduce lock/unlock_split_queue{_irqsave}() to make the deferred split queue lock easier to be reparented. And in the next patch, we can use a similar approach (just like lruvec lock did) to make thp deferred split queue lock safe when the LRU pages reparented. Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> --- mm/huge_memory.c | 96 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 22 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 233474770424..d8590408abbb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -496,25 +496,76 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) } #ifdef CONFIG_MEMCG -static inline struct deferred_split *get_deferred_split_queue(struct page *page) +static inline struct mem_cgroup *split_queue_to_memcg(struct deferred_split *queue) { - struct mem_cgroup *memcg = page_memcg(compound_head(page)); - struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + return container_of(queue, struct mem_cgroup, deferred_split_queue); +} + +static struct deferred_split *lock_split_queue(struct page *page) +{ + struct deferred_split *queue; + struct mem_cgroup *memcg; + + memcg = page_memcg(compound_head(page)); + if (memcg) + queue = &memcg->deferred_split_queue; + else + queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue; + spin_lock(&queue->split_queue_lock); + + return queue; +} +static struct deferred_split *lock_split_queue_irqsave(struct page *page, + unsigned long *flags) +{ + struct deferred_split *queue; + struct mem_cgroup *memcg; + + memcg = page_memcg(compound_head(page)); if (memcg) - return &memcg->deferred_split_queue; + queue = &memcg->deferred_split_queue; else - return &pgdat->deferred_split_queue; + queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue; + spin_lock_irqsave(&queue->split_queue_lock, *flags); + + return queue; } #else -static inline struct deferred_split *get_deferred_split_queue(struct page *page) +static struct deferred_split *lock_split_queue(struct page *page) +{ + struct deferred_split *queue; + + queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue; + spin_lock(&queue->split_queue_lock); + + return queue; +} + +static struct deferred_split *lock_split_queue_irqsave(struct page *page, + unsigned long *flags) + { - struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + struct deferred_split *queue; + + queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue; + spin_lock_irqsave(&queue->split_queue_lock, *flags); - return &pgdat->deferred_split_queue; + return queue; } #endif +static inline void unlock_split_queue(struct deferred_split *queue) +{ + spin_unlock(&queue->split_queue_lock); +} + +static inline void unlock_split_queue_irqrestore(struct deferred_split *queue, + unsigned long flags) +{ + spin_unlock_irqrestore(&queue->split_queue_lock, flags); +} + void prep_transhuge_page(struct page *page) { /* @@ -2610,7 +2661,7 @@ bool can_split_huge_page(struct page *page, int *pextra_pins) int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); - struct deferred_split *ds_queue = get_deferred_split_queue(head); + struct deferred_split *ds_queue; struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int mapcount, extra_pins, ret; @@ -2689,14 +2740,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } /* Prevent deferred_split_scan() touching ->_refcount */ - spin_lock(&ds_queue->split_queue_lock); + ds_queue = lock_split_queue(head); mapcount = total_mapcount(head); if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; list_del(page_deferred_list(head)); } - spin_unlock(&ds_queue->split_queue_lock); + unlock_split_queue(ds_queue); if (mapping) { int nr = thp_nr_pages(head); @@ -2711,7 +2762,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __split_huge_page(page, list, end); ret = 0; } else { - spin_unlock(&ds_queue->split_queue_lock); + unlock_split_queue(ds_queue); fail: if (mapping) xa_unlock(&mapping->i_pages); local_irq_enable(); @@ -2733,24 +2784,21 @@ fail: if (mapping) void free_transhuge_page(struct page *page) { - struct deferred_split *ds_queue = get_deferred_split_queue(page); + struct deferred_split *ds_queue; unsigned long flags; - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + ds_queue = lock_split_queue_irqsave(page, &flags); if (!list_empty(page_deferred_list(page))) { ds_queue->split_queue_len--; list_del(page_deferred_list(page)); } - spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + unlock_split_queue_irqrestore(ds_queue, flags); free_compound_page(page); } void deferred_split_huge_page(struct page *page) { - struct deferred_split *ds_queue = get_deferred_split_queue(page); -#ifdef CONFIG_MEMCG - struct mem_cgroup *memcg = page_memcg(compound_head(page)); -#endif + struct deferred_split *ds_queue; unsigned long flags; VM_BUG_ON_PAGE(!PageTransHuge(page), page); @@ -2768,18 +2816,22 @@ void deferred_split_huge_page(struct page *page) if (PageSwapCache(page)) return; - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + ds_queue = lock_split_queue_irqsave(page, &flags); if (list_empty(page_deferred_list(page))) { count_vm_event(THP_DEFERRED_SPLIT_PAGE); list_add_tail(page_deferred_list(page), &ds_queue->split_queue); ds_queue->split_queue_len++; #ifdef CONFIG_MEMCG - if (memcg) + if (page_memcg(page)) { + struct mem_cgroup *memcg; + + memcg = split_queue_to_memcg(ds_queue); set_shrinker_bit(memcg, page_to_nid(page), deferred_split_shrinker.id); + } #endif } - spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); + unlock_split_queue_irqrestore(ds_queue, flags); } static unsigned long deferred_split_count(struct shrinker *shrink, -- 2.11.0