On Wed 24-03-21 17:28:33, Mike Kravetz wrote: [...] > @@ -2074,17 +2067,16 @@ static int gather_surplus_pages(struct hstate *h, long delta) > * to the associated reservation map. > * 2) Free any unused surplus pages that may have been allocated to satisfy > * the reservation. As many as unused_resv_pages may be freed. > - * > - * Called with hugetlb_lock held. However, the lock could be dropped (and > - * reacquired) during calls to cond_resched_lock. Whenever dropping the lock, > - * we must make sure nobody else can claim pages we are in the process of > - * freeing. Do this by ensuring resv_huge_page always is greater than the > - * number of huge pages we plan to free when dropping the lock. > */ > static void return_unused_surplus_pages(struct hstate *h, > unsigned long unused_resv_pages) > { > unsigned long nr_pages; > + struct page *page, *t_page; > + struct list_head page_list; > + > + /* Uncommit the reservation */ > + h->resv_huge_pages -= unused_resv_pages; Is this ok for cases where remove_pool_huge_page fails early? I have to say I am kinda lost in the resv_huge_pages accounting here. The original code was already quite supicious to me. TBH. > > /* Cannot return gigantic pages currently */ > if (hstate_is_gigantic(h)) > @@ -2101,24 +2093,27 @@ static void return_unused_surplus_pages(struct hstate *h, > * evenly across all nodes with memory. Iterate across these nodes > * until we can no longer free unreserved surplus pages. This occurs > * when the nodes with surplus pages have no free pages. > - * free_pool_huge_page() will balance the freed pages across the > + * remove_pool_huge_page() will balance the freed pages across the > * on-line nodes with memory and will handle the hstate accounting. > - * > - * Note that we decrement resv_huge_pages as we free the pages. If > - * we drop the lock, resv_huge_pages will still be sufficiently large > - * to cover subsequent pages we may free. > */ > + INIT_LIST_HEAD(&page_list); > while (nr_pages--) { > - h->resv_huge_pages--; > - unused_resv_pages--; > - if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1)) > + page = remove_pool_huge_page(h, &node_states[N_MEMORY], 1); > + if (!page) > goto out; > - cond_resched_lock(&hugetlb_lock); > + > + INIT_LIST_HEAD(&page->lru); again unnecessary INIT_LIST_HEAD > + list_add(&page->lru, &page_list); > } > > out: > - /* Fully uncommit the reservation */ > - h->resv_huge_pages -= unused_resv_pages; > + spin_unlock(&hugetlb_lock); > + list_for_each_entry_safe(page, t_page, &page_list, lru) { > + list_del(&page->lru); > + update_and_free_page(h, page); > + cond_resched(); > + } You have the same construct at 3 different places maybe it deserves a little helper update_and_free_page_batch. > + spin_lock(&hugetlb_lock); > } > > > @@ -2648,6 +2643,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, > nodemask_t *nodes_allowed) > { > unsigned long min_count, ret; > + struct page *page, *t_page; > + struct list_head page_list; > NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL); > > /* > @@ -2757,11 +2754,28 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, > min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; > min_count = max(count, min_count); > try_to_free_low(h, min_count, nodes_allowed); > + > + /* > + * Collect pages to be removed on list without dropping lock > + */ > + INIT_LIST_HEAD(&page_list); > while (min_count < persistent_huge_pages(h)) { > - if (!free_pool_huge_page(h, nodes_allowed, 0)) > + page = remove_pool_huge_page(h, nodes_allowed, 0); > + if (!page) > break; > - cond_resched_lock(&hugetlb_lock); > + > + INIT_LIST_HEAD(&page->lru); INIT_LIST_HEAD again. > + list_add(&page->lru, &page_list); > } > + /* free the pages after dropping lock */ > + spin_unlock(&hugetlb_lock); > + list_for_each_entry_safe(page, t_page, &page_list, lru) { > + list_del(&page->lru); > + update_and_free_page(h, page); > + cond_resched(); > + } > + spin_lock(&hugetlb_lock); > + > while (count < persistent_huge_pages(h)) { > if (!adjust_pool_surplus(h, nodes_allowed, 1)) > break; > -- > 2.30.2 -- Michal Hocko SUSE Labs