Re: [PATCH mm-unstable] mm: migrate: folio_ref_freeze() under xas_lock_irq()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2024/6/25 13:04, Hugh Dickins wrote:
Commit "mm: migrate: split folio_migrate_mapping()" drew attention to
"Note, the folio_ref_freeze() is moved out of xas_lock_irq(), Since the
folio is already isolated and locked during migration, so suppose that
there is no functional change."

That was a mistake. Freezing a folio's refcount to 0 is much like taking
a spinlock: see how filemap_get_entry() takes rcu_read_lock() then spins
around until the folio is unfrozen. If the task freezing is preempted (or
calls cond_resched(), as folio_mc_copy() may do), then it risks deadlock:
in my case, one CPU in zap_pte_range() with free_swap_and_cache_nr()
trying to reclaim swap while PTL is held, all the other CPUs in reclaim
spinning for that PTL.

Oh, thanks for pointing this out, I didn't take that into account.

I'm uncertain whether it's necessary for interrupts to be disabled as
well as preemption, but since they have to be disabled for the page
cache migration, it's much the best to do it all together as before.
So revert to folio_ref_freeze() under xas_lock_irq(): but keep the
preliminary folio_ref_count() check, which does make sense before
trying to copy the folio's data.

This is what my RFC version[1] does, which adds same reference check to
avoid the unnecessary folio_mc_copy().

Should I resend all patches, or Andrew directly pick this one?


Thanks.

[1] https://lore.kernel.org/linux-mm/20240129070934.3717659-7-wangkefeng.wang@xxxxxxxxxx/



Use "expected_count" for the expected count throughout.

Signed-off-by: Hugh Dickins <hughd@xxxxxxxxxx>
---
  mm/migrate.c | 59 +++++++++++++++++++++++++---------------------------
  1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 27f070f64f27..8beedbb42a93 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -400,8 +400,8 @@ static int folio_expected_refs(struct address_space *mapping,
   * 2 for folios with a mapping
   * 3 for folios with a mapping and PagePrivate/PagePrivate2 set.
   */
-static void __folio_migrate_mapping(struct address_space *mapping,
-		struct folio *newfolio, struct folio *folio, int expected_cnt)
+static int __folio_migrate_mapping(struct address_space *mapping,
+		struct folio *newfolio, struct folio *folio, int expected_count)

We can rename it back to folio_migrate_mapping().

  {
  	XA_STATE(xas, &mapping->i_pages, folio_index(folio));
  	struct zone *oldzone, *newzone;
@@ -415,13 +415,18 @@ static void __folio_migrate_mapping(struct address_space *mapping,
  		newfolio->mapping = folio->mapping;
  		if (folio_test_swapbacked(folio))
  			__folio_set_swapbacked(newfolio);
-		return;
+		return MIGRATEPAGE_SUCCESS;
  	}
oldzone = folio_zone(folio);
  	newzone = folio_zone(newfolio);
xas_lock_irq(&xas);
+	if (!folio_ref_freeze(folio, expected_count)) {
+		xas_unlock_irq(&xas);
+		return -EAGAIN;
+	}
+
  	/* Now we know that no one else is looking at the folio */
  	newfolio->index = folio->index;
  	newfolio->mapping = folio->mapping;
@@ -456,7 +461,7 @@ static void __folio_migrate_mapping(struct address_space *mapping,
  	 * old folio by unfreezing to one less reference.
  	 * We know this isn't the last reference.
  	 */
-	folio_ref_unfreeze(folio, expected_cnt - nr);
+	folio_ref_unfreeze(folio, expected_count - nr);
xas_unlock(&xas);
  	/* Leave irq disabled to prevent preemption while updating stats */
@@ -504,23 +509,19 @@ static void __folio_migrate_mapping(struct address_space *mapping,
  		}
  	}
  	local_irq_enable();
+
+	return MIGRATEPAGE_SUCCESS;
  }
int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio,
  			  struct folio *folio, int extra_count)
  {
-	int expected_cnt = folio_expected_refs(mapping, folio) + extra_count;
+	int expected_count = folio_expected_refs(mapping, folio) + extra_count;
- if (!mapping) {
-		if (folio_ref_count(folio) != expected_cnt)
-			return -EAGAIN;
-	} else {
-		if (!folio_ref_freeze(folio, expected_cnt))
-			return -EAGAIN;
-	}
+	if (folio_ref_count(folio) != expected_count)
+		return -EAGAIN;
- __folio_migrate_mapping(mapping, newfolio, folio, expected_cnt);
-	return MIGRATEPAGE_SUCCESS;
+	return __folio_migrate_mapping(mapping, newfolio, folio, expected_count);
  }
  EXPORT_SYMBOL(folio_migrate_mapping);
@@ -534,16 +535,18 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
  	XA_STATE(xas, &mapping->i_pages, folio_index(src));
  	int ret, expected_count = folio_expected_refs(mapping, src);
- if (!folio_ref_freeze(src, expected_count))
+	if (folio_ref_count(src) != expected_count)
  		return -EAGAIN;
ret = folio_mc_copy(dst, src);
-	if (unlikely(ret)) {
-		folio_ref_unfreeze(src, expected_count);
+	if (unlikely(ret))
  		return ret;
-	}
xas_lock_irq(&xas);
+	if (!folio_ref_freeze(src, expected_count)) {
+		xas_unlock_irq(&xas);
+		return -EAGAIN;
+	}
dst->index = src->index;
  	dst->mapping = src->mapping;
@@ -660,24 +663,18 @@ static int __migrate_folio(struct address_space *mapping, struct folio *dst,
  			   struct folio *src, void *src_private,
  			   enum migrate_mode mode)
  {
-	int ret, expected_cnt = folio_expected_refs(mapping, src);
+	int ret, expected_count = folio_expected_refs(mapping, src);
- if (!mapping) {
-		if (folio_ref_count(src) != expected_cnt)
-			return -EAGAIN;
-	} else {
-		if (!folio_ref_freeze(src, expected_cnt))
-			return -EAGAIN;
-	}
+	if (folio_ref_count(src) != expected_count)
+		return -EAGAIN;
ret = folio_mc_copy(dst, src);
-	if (unlikely(ret)) {
-		if (mapping)
-			folio_ref_unfreeze(src, expected_cnt);
+	if (unlikely(ret))
  		return ret;
-	}
- __folio_migrate_mapping(mapping, dst, src, expected_cnt);
+	ret = __folio_migrate_mapping(mapping, dst, src, expected_count);
+	if (ret != MIGRATEPAGE_SUCCESS)
+		return ret;
if (src_private)
  		folio_attach_private(dst, folio_detach_private(src));




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux