On Mon, May 25, 2020 at 08:26:48AM +0800, Huang Ying wrote: > diff --git a/mm/swapfile.c b/mm/swapfile.c > index 423c234aca15..0abd93d2a4fc 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -615,7 +615,8 @@ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, > * discarding, do discard now and reclaim them > */ > swap_do_scheduled_discard(si); > - *scan_base = *offset = si->cluster_next; > + *scan_base = this_cpu_read(*si->cluster_next_cpu); > + *offset = *scan_base; > goto new_cluster; Why is this done? As far as I can tell, the values always get overwritten at the end of the function with tmp and tmp isn't derived from them. Seems ebc2a1a69111 moved some logic that used to make sense but doesn't have any effect now. > } else > return false; > @@ -721,6 +722,34 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, > } > } > > +static void set_cluster_next(struct swap_info_struct *si, unsigned long next) > +{ > + unsigned long prev; > + > + if (!(si->flags & SWP_SOLIDSTATE)) { > + si->cluster_next = next; > + return; > + } > + > + prev = this_cpu_read(*si->cluster_next_cpu); > + /* > + * Cross the swap address space size aligned trunk, choose > + * another trunk randomly to avoid lock contention on swap > + * address space if possible. > + */ > + if ((prev >> SWAP_ADDRESS_SPACE_SHIFT) != > + (next >> SWAP_ADDRESS_SPACE_SHIFT)) { > + /* No free swap slots available */ > + if (si->highest_bit <= si->lowest_bit) > + return; > + next = si->lowest_bit + > + prandom_u32_max(si->highest_bit - si->lowest_bit + 1); > + next = ALIGN(next, SWAP_ADDRESS_SPACE_PAGES); > + next = max_t(unsigned int, next, si->lowest_bit); next is always greater than lowest_bit because it's aligned up. I think the intent of the max_t line is to handle when next is aligned outside the valid range, so it'd have to be ALIGN_DOWN instead? These aside, patch looks good to me.