[PATCH v3 4/6] Revert "fsnotify: generate pre-content permission event on page fault"

Amir Goldstein <amir73il@xxxxxxxxx> · Wed, 12 Mar 2025 08:38:50 +0100

This reverts commit 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6.

In the use case of buffered write whose input buffer is mmapped file on a
filesystem with a pre-content mark, the prefaulting of the buffer can
happen under the filesystem freeze protection (obtained in vfs_write())
which breaks assumptions of pre-content hook and introduces potential
deadlock of HSM handler in userspace with filesystem freezing.

Now that we have pre-content hooks at file mmap() time, disable the
pre-content event hooks on page fault to avoid the potential deadlock.

Reported-by: syzbot+7229071b47908b19d5b7@xxxxxxxxxxxxxxxxxxxxxxxxx
Closes: https://lore.kernel.org/linux-fsdevel/7ehxrhbvehlrjwvrduoxsao5k3x4aw275patsb3krkwuq573yv@o2hskrfawbnc/
Fixes: 8392bc2ff8c8b ("fsnotify: generate pre-content permission event on page fault")
Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
---
 include/linux/mm.h |  1 -
 mm/filemap.c       | 74 ----------------------------------------------
 mm/nommu.c         |  7 -----
 3 files changed, 82 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b1068ddcbb70..8483e09aeb2cd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3420,7 +3420,6 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
 extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff);
 extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
-extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);
 
 extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
diff --git a/mm/filemap.c b/mm/filemap.c
index 2974691fdfad2..ff5fcdd961364 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,7 +47,6 @@
 #include <linux/splice.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/sched/mm.h>
-#include <linux/fsnotify.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -3336,48 +3335,6 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
 	return ret;
 }
 
-/**
- * filemap_fsnotify_fault - maybe emit a pre-content event.
- * @vmf:	struct vm_fault containing details of the fault.
- *
- * If we have a pre-content watch on this file we will emit an event for this
- * range.  If we return anything the fault caller should return immediately, we
- * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
- * fault again and then the fault handler will run the second time through.
- *
- * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
- */
-vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
-{
-	struct file *fpin = NULL;
-	int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
-	loff_t pos = vmf->pgoff >> PAGE_SHIFT;
-	size_t count = PAGE_SIZE;
-	int err;
-
-	/*
-	 * We already did this and now we're retrying with everything locked,
-	 * don't emit the event and continue.
-	 */
-	if (vmf->flags & FAULT_FLAG_TRIED)
-		return 0;
-
-	/* No watches, we're done. */
-	if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
-		return 0;
-
-	fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-	if (!fpin)
-		return VM_FAULT_SIGBUS;
-
-	err = fsnotify_file_area_perm(fpin, mask, &pos, count);
-	fput(fpin);
-	if (err)
-		return VM_FAULT_SIGBUS;
-	return VM_FAULT_RETRY;
-}
-EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
-
 /**
  * filemap_fault - read in file data for page fault handling
  * @vmf:	struct vm_fault containing details of the fault
@@ -3481,37 +3438,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 	 * or because readahead was otherwise unable to retrieve it.
 	 */
 	if (unlikely(!folio_test_uptodate(folio))) {
-		/*
-		 * If this is a precontent file we have can now emit an event to
-		 * try and populate the folio.
-		 */
-		if (!(vmf->flags & FAULT_FLAG_TRIED) &&
-		    unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
-			loff_t pos = folio_pos(folio);
-			size_t count = folio_size(folio);
-
-			/* We're NOWAIT, we have to retry. */
-			if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
-				folio_unlock(folio);
-				goto out_retry;
-			}
-
-			if (mapping_locked)
-				filemap_invalidate_unlock_shared(mapping);
-			mapping_locked = false;
-
-			folio_unlock(folio);
-			fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-			if (!fpin)
-				goto out_retry;
-
-			error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
-							count);
-			if (error)
-				ret = VM_FAULT_SIGBUS;
-			goto out_retry;
-		}
-
 		/*
 		 * If the invalidate lock is not held, the folio was in cache
 		 * and uptodate and now it is not. Strange but possible since we
diff --git a/mm/nommu.c b/mm/nommu.c
index baa79abdaf037..9cb6e99215e2b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1613,13 +1613,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
 
-vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
-{
-	BUG();
-	return 0;
-}
-EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
-
 vm_fault_t filemap_fault(struct vm_fault *vmf)
 {
 	BUG();
-- 
2.34.1