Hi Kairui, On 02/15/25 at 01:57am, Kairui Song wrote: ......snip.... > -int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) > +swp_entry_t folio_alloc_swap(struct folio *folio) > { > - int order = swap_entry_order(entry_order); > - unsigned long size = 1 << order; > + unsigned int order = folio_order(folio); > + unsigned int size = 1 << order; > struct swap_info_struct *si, *next; > - int n_ret = 0; > + swp_entry_t entry = {}; > + unsigned long offset; > int node; > > + if (order) { > + /* > + * Should not even be attempting large allocations when huge > + * page swap is disabled. Warn and fail the allocation. > + */ > + if (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER) { > + VM_WARN_ON_ONCE(1); > + return entry; > + } > + } > + > /* Fast path using percpu cluster */ > local_lock(&percpu_swap_cluster.lock); > - n_ret = swap_alloc_fast(swp_entries, > - SWAP_HAS_CACHE, > - order, n_goal); > - if (n_ret == n_goal) > - goto out; > + if (swap_alloc_fast(&entry, SWAP_HAS_CACHE, order)) > + goto out_alloced; > > - n_goal = min_t(int, n_goal - n_ret, SWAP_BATCH); > /* Rotate the device and switch to a new cluster */ > spin_lock(&swap_avail_lock); > start_over: > @@ -1268,11 +1236,14 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) > plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]); > spin_unlock(&swap_avail_lock); > if (get_swap_device_info(si)) { > - n_ret += scan_swap_map_slots(si, SWAP_HAS_CACHE, n_goal, > - swp_entries + n_ret, order); > + offset = cluster_alloc_swap_entry(si, order, SWAP_HAS_CACHE); > put_swap_device(si); > - if (n_ret || size > 1) > - goto out; > + if (offset) { > + entry = swp_entry(si->type, offset); > + goto out_alloced; > + } > + if (order) > + goto out_failed; This is not related to this patch, do you know why non order-0 case can't start over on different devices? > } > > spin_lock(&swap_avail_lock); > @@ -1291,10 +1262,20 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_order) > goto start_over; > } > spin_unlock(&swap_avail_lock); > -out: > +out_failed: > + local_unlock(&percpu_swap_cluster.lock); > + return entry; > + > +out_alloced: > local_unlock(&percpu_swap_cluster.lock); > - atomic_long_sub(n_ret * size, &nr_swap_pages); > - return n_ret; > + if (mem_cgroup_try_charge_swap(folio, entry)) { > + put_swap_folio(folio, entry); > + entry.val = 0; > + } else { > + atomic_long_sub(size, &nr_swap_pages); > + } > + > + return entry; > } > > static struct swap_info_struct *_swap_info_get(swp_entry_t entry) ......snip.... > @@ -2623,16 +2591,6 @@ static bool __has_usable_swap(void) > return !plist_head_empty(&swap_active_head); > } seems the __has_usable_swap() function need be moved into the ifdeffery scope where __folio_throttle_swaprate() is located to fix the lkp warning. > > -bool has_usable_swap(void) > -{ > - bool ret; > - > - spin_lock(&swap_lock); > - ret = __has_usable_swap(); > - spin_unlock(&swap_lock); > - return ret; > -} > - > /* > * Called after clearing SWP_WRITEOK, ensures cluster_alloc_range > * see the updated flags, so there will be no more allocations. Other than the test robot reported warning, this patch looks good to me. Thanks.