Oof, fatfingered send vs postpone. Here is the rest ;) On Thu, Oct 13, 2022 at 12:39:53PM -0400, Johannes Weiner wrote: > On Wed, Oct 12, 2022 at 03:51:47PM -0700, alexlzhu@xxxxxx wrote: > > + > > + if (get_page_unless_zero(head)) { > > + if (!trylock_page(head)) { > > + put_page(head); > > + return LRU_SKIP; > > + } > > The trylock could use a comment: /* Inverse lock order from add_underutilized_thp() */ > > + list_lru_isolate(lru, item); > > + spin_unlock_irq(lock); > > + num_utilized_pages = thp_number_utilized_pages(head); > > + bucket = thp_utilization_bucket(num_utilized_pages); > > + if (bucket < THP_UTIL_BUCKET_NR - 1) { > > + split_huge_page(head); > > + spin_lock_irq(lock); The re-lock also needs to be unconditional, or you'll get a double unlock in the not-split case. > > @@ -2737,6 +2800,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) > > struct folio *folio = page_folio(page); > > struct deferred_split *ds_queue = get_deferred_split_queue(&folio->page); > > XA_STATE(xas, &folio->mapping->i_pages, folio->index); > > + struct list_head *underutilized_thp_list = page_underutilized_thp_list(&folio->page); > > struct anon_vma *anon_vma = NULL; > > struct address_space *mapping = NULL; > > int extra_pins, ret; > > @@ -2844,6 +2908,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) > > list_del(page_deferred_list(&folio->page)); > > } > > spin_unlock(&ds_queue->split_queue_lock); A brief comment would be useful: /* Frozen refs lock out additions, test can be lockless */ > > + if (!list_empty(underutilized_thp_list)) > > + list_lru_del_page(&huge_low_util_page_lru, &folio->page, > > + underutilized_thp_list); > > if (mapping) { > > int nr = folio_nr_pages(folio); > > > > @@ -2886,6 +2953,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) > > void free_transhuge_page(struct page *page) > > { > > struct deferred_split *ds_queue = get_deferred_split_queue(page); > > + struct list_head *underutilized_thp_list = page_underutilized_thp_list(page); > > unsigned long flags; > > > > spin_lock_irqsave(&ds_queue->split_queue_lock, flags); > > @@ -2894,6 +2962,12 @@ void free_transhuge_page(struct page *page) > > list_del(page_deferred_list(page)); > > } > > spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); Here as well: /* A dead page cannot be re-added, test can be lockless */ > > + if (!list_empty(underutilized_thp_list)) > > + list_lru_del_page(&huge_low_util_page_lru, page, underutilized_thp_list); > > + > > + if (PageLRU(page)) > > + __folio_clear_lru_flags(page_folio(page)); > > + > > free_compound_page(page); > > } > > > > @@ -2934,6 +3008,39 @@ void deferred_split_huge_page(struct page *page) > > spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); > > } > > > > +void add_underutilized_thp(struct page *page) > > +{ > > + VM_BUG_ON_PAGE(!PageTransHuge(page), page); > > + > > + if (PageSwapCache(page)) > > + return; Why is that? > > + > > + /* > > + * Need to take a reference on the page to prevent the page from getting free'd from > > + * under us while we are adding the THP to the shrinker. > > + */ > > + if (!get_page_unless_zero(page)) > > + return; > > + > > + if (!is_anon_transparent_hugepage(page)) > > + goto out_put; > > + > > + if (is_huge_zero_page(page)) > > + goto out_put; > > + And here: /* Stabilize page->memcg to allocate and add to the same list */ > > + lock_page(page); > > + > > + if (memcg_list_lru_alloc(page_memcg(page), &huge_low_util_page_lru, GFP_KERNEL)) > > + goto out_unlock; The testbot pointed this out already, but this allocation call needs an #ifdef CONFIG_MEMCG_KMEM guard.