On 12/31/24 at 01:46am, Kairui Song wrote: > From: Kairui Song <kasong@xxxxxxxxxxx> > > Instead of using a returning argument, we can simply store the next > cluster offset to the fixed percpu location, which reduce the stack > usage and simplify the function: > > Object size: > ./scripts/bloat-o-meter mm/swapfile.o mm/swapfile.o.new > add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-271 (-271) > Function old new delta > get_swap_pages 2847 2733 -114 > alloc_swap_scan_cluster 894 737 -157 > Total: Before=30833, After=30562, chg -0.88% > > Stack usage: > Before: > swapfile.c:1190:5:get_swap_pages 240 static > > After: > swapfile.c:1185:5:get_swap_pages 216 static > > Signed-off-by: Kairui Song <kasong@xxxxxxxxxxx> > --- > include/linux/swap.h | 4 +-- > mm/swapfile.c | 66 +++++++++++++++++++------------------------- > 2 files changed, 31 insertions(+), 39 deletions(-) > > diff --git a/include/linux/swap.h b/include/linux/swap.h > index c4ff31cb6bde..4c1d2e69689f 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -275,9 +275,9 @@ enum swap_cluster_flags { > * The first page in the swap file is the swap header, which is always marked > * bad to prevent it from being allocated as an entry. This also prevents the > * cluster to which it belongs being marked free. Therefore 0 is safe to use as > - * a sentinel to indicate next is not valid in percpu_cluster. > + * a sentinel to indicate an entry is not valid. > */ > -#define SWAP_NEXT_INVALID 0 > +#define SWAP_ENTRY_INVALID 0 > > #ifdef CONFIG_THP_SWAP > #define SWAP_NR_ORDERS (PMD_ORDER + 1) > diff --git a/mm/swapfile.c b/mm/swapfile.c > index dadd4fead689..60a650ba88fd 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -759,23 +759,23 @@ static bool cluster_alloc_range(struct swap_info_struct *si, struct swap_cluster > return true; > } > > -static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, unsigned long offset, > - unsigned int *foundp, unsigned int order, > +/* Try use a new cluster for current CPU and allocate from it. */ > +static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, > + struct swap_cluster_info *ci, > + unsigned long offset, > + unsigned int order, > unsigned char usage) > { > - unsigned long start = offset & ~(SWAPFILE_CLUSTER - 1); > + unsigned int next = SWAP_ENTRY_INVALID, found = SWAP_ENTRY_INVALID; > + unsigned long start = ALIGN_DOWN(offset, SWAPFILE_CLUSTER); > unsigned long end = min(start + SWAPFILE_CLUSTER, si->max); > unsigned int nr_pages = 1 << order; > bool need_reclaim, ret; > - struct swap_cluster_info *ci; > > - ci = &si->cluster_info[offset / SWAPFILE_CLUSTER]; > lockdep_assert_held(&ci->lock); > > - if (end < nr_pages || ci->count + nr_pages > SWAPFILE_CLUSTER) { > - offset = SWAP_NEXT_INVALID; > + if (end < nr_pages || ci->count + nr_pages > SWAPFILE_CLUSTER) > goto out; > - } > > for (end -= nr_pages; offset <= end; offset += nr_pages) { > need_reclaim = false; > @@ -789,34 +789,27 @@ static unsigned int alloc_swap_scan_cluster(struct swap_info_struct *si, unsigne > * cluster has no flag set, and change of list > * won't cause fragmentation. > */ > - if (!cluster_is_usable(ci, order)) { > - offset = SWAP_NEXT_INVALID; > + if (!cluster_is_usable(ci, order)) > goto out; > - } > if (cluster_is_free(ci)) > offset = start; > /* Reclaim failed but cluster is usable, try next */ > if (!ret) > continue; > } > - if (!cluster_alloc_range(si, ci, offset, usage, order)) { > - offset = SWAP_NEXT_INVALID; > - goto out; > - } > - *foundp = offset; > - if (ci->count == SWAPFILE_CLUSTER) { > - offset = SWAP_NEXT_INVALID; > - goto out; > - } > + if (!cluster_alloc_range(si, ci, offset, usage, order)) > + break; > + found = offset; > offset += nr_pages; > + if (ci->count < SWAPFILE_CLUSTER && offset <= end) > + next = offset; > break; > } > - if (offset > end) > - offset = SWAP_NEXT_INVALID; > out: > relocate_cluster(si, ci); > unlock_cluster(ci); > - return offset; > + __this_cpu_write(si->percpu_cluster->next[order], next); > + return found; > } > > /* Return true if reclaimed a whole cluster */ > @@ -885,8 +878,8 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o > if (cluster_is_usable(ci, order)) { > if (cluster_is_free(ci)) > offset = cluster_offset(si, ci); > - offset = alloc_swap_scan_cluster(si, offset, &found, > - order, usage); > + found = alloc_swap_scan_cluster(si, ci, offset, > + order, usage); > } else { > unlock_cluster(ci); > } > @@ -897,8 +890,8 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o > new_cluster: > ci = cluster_isolate_lock(si, &si->free_clusters); > if (ci) { > - offset = alloc_swap_scan_cluster(si, cluster_offset(si, ci), > - &found, order, usage); > + found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), > + order, usage); > if (found) > goto done; > } > @@ -911,8 +904,8 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o > unsigned int frags = 0, frags_existing; > > while ((ci = cluster_isolate_lock(si, &si->nonfull_clusters[order]))) { > - offset = alloc_swap_scan_cluster(si, cluster_offset(si, ci), > - &found, order, usage); > + found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), > + order, usage); > /* > * With `fragmenting` set to true, it will surely take > * the cluster off nonfull list > @@ -932,8 +925,8 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o > * per-CPU usage, but they could contain newly released > * reclaimable (eg. lazy-freed swap cache) slots. > */ > - offset = alloc_swap_scan_cluster(si, cluster_offset(si, ci), > - &found, order, usage); > + found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), > + order, usage); > if (found) > goto done; > frags++; > @@ -959,21 +952,20 @@ static unsigned long cluster_alloc_swap_entry(struct swap_info_struct *si, int o > */ > while ((ci = cluster_isolate_lock(si, &si->frag_clusters[o]))) { > atomic_long_dec(&si->frag_cluster_nr[o]); > - offset = alloc_swap_scan_cluster(si, cluster_offset(si, ci), > - &found, order, usage); > + found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), > + 0, usage); > if (found) > goto done; > } > > while ((ci = cluster_isolate_lock(si, &si->nonfull_clusters[o]))) { > - offset = alloc_swap_scan_cluster(si, cluster_offset(si, ci), > - &found, order, usage); > + found = alloc_swap_scan_cluster(si, ci, cluster_offset(si, ci), > + 0, usage); > if (found) > goto done; > } > } > done: > - __this_cpu_write(si->percpu_cluster->next[order], offset); > local_unlock(&si->percpu_cluster->lock); Do you think if we still need hold the si->percpu_cluster->lock till the end of cluster_alloc_swap_entry() invocation? If so, we may need hold the lock during the whole period when going through percpu_cluster->next, free_cluster, nonfull, frag_clusters until we get one available slot, even though we keep upating the si->percpu_cluster->next[order]. I can't see the point by changing it like this. > > return found; > @@ -3194,7 +3186,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si, > > cluster = per_cpu_ptr(si->percpu_cluster, cpu); > for (i = 0; i < SWAP_NR_ORDERS; i++) > - cluster->next[i] = SWAP_NEXT_INVALID; > + cluster->next[i] = SWAP_ENTRY_INVALID; > local_lock_init(&cluster->lock); > } > > -- > 2.47.1 > >