Re: [PATCH v2 2/2] mm: attempt to batch free swap entries for zap_pte_range()

David Hildenbrand <david@xxxxxxxxxx> · Wed, 7 Aug 2024 11:28:59 +0200

mm/swapfile.c | 78 +++++++++++++++++++++++++++++++++++++++++++--------
  1 file changed, 67 insertions(+), 11 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 35cb58373493..25c3f98fa8d5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -156,6 +156,25 @@ static bool swap_is_has_cache(struct swap_info_struct *si,
  	return true;
  }
  
+static bool swap_is_last_map(struct swap_info_struct *si,
+			      unsigned long offset, int nr_pages,
+			      bool *has_cache)

Please use double tabs for indenting parameters on 2nd line on 
new/changed code:

		unsigned long offset, int nr_pages, bool *has_cache)

Results in less churn when renaming functions and we can frequently 
avoid some lines.

+{
+	unsigned char *map = si->swap_map + offset;
+	unsigned char *map_end = map + nr_pages;
+	bool cached = false;
+
+	do {
+		if ((*map & ~SWAP_HAS_CACHE) != 1)
+			return false;
+		if (*map & SWAP_HAS_CACHE)
+			cached = true;
+	} while (++map < map_end);
+
+	*has_cache = cached;
+	return true;
+}
+
  /*
   * returns number of pages in the folio that backs the swap entry. If positive,
   * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no
@@ -1469,6 +1488,53 @@ static unsigned char __swap_entry_free(struct swap_info_struct *si,
  	return usage;
  }
  
+static bool __swap_entries_free(struct swap_info_struct *si,
+				swp_entry_t entry, int nr)

Dito.

+{
+	unsigned long offset = swp_offset(entry);
+	unsigned int type = swp_type(entry);
+	struct swap_cluster_info *ci;
+	bool has_cache = false;
+	unsigned char count;
+	bool can_batch;
+	int i;
+
+	if (nr <= 1 || swap_count(data_race(si->swap_map[offset])) != 1)
+		goto fallback;
+	/* cross into another cluster */
+	if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER)
+		goto fallback;
+
+	ci = lock_cluster_or_swap_info(si, offset);
+	can_batch = swap_is_last_map(si, offset, nr, &has_cache);
+	if (can_batch) {
+		for (i = 0; i < nr; i++)
+			WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
+	}
+	unlock_cluster_or_swap_info(si, ci);
+
+	if (!can_batch)
+		goto fallback;

I'd avoid "can_batch" and just do:

ci = lock_cluster_or_swap_info(si, offset);
if (!swap_is_last_map(si, offset, nr, &has_cache)) {
	unlock_cluster_or_swap_info(si, ci);
	goto fallback;
}
for (i = 0; i < nr; i++)
	WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
unlock_cluster_or_swap_info(si, ci);

+	if (!has_cache) {
+		spin_lock(&si->lock);

I'm no expert on that code, but we might drop the cluster lock the take 
the swap_info lock and then retake the cluster lock. I assume there are 
no races we are worrying about here, right?

+		swap_entry_range_free(si, entry, nr);
+		spin_unlock(&si->lock);
+	}
+	return has_cache;
+
+fallback:
+	for (i = 0; i  < nr; i++) {

One space too much before the "<".

+		if (data_race(si->swap_map[offset + i])) {
+			count = __swap_entry_free(si, swp_entry(type, offset + i));
+			if (count == SWAP_HAS_CACHE)
+				has_cache = true;
+		} else {
+			WARN_ON_ONCE(1);
+		}
+	}
+	return has_cache;
+}
+

--
Cheers,

David / dhildenb