Re: [PATCH v8 00/17] gfs2: Fix mmap + page fault deadlocks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



One of the arguments against Dave Hansen's patch that eliminates the
pre-faulting was that it doubles the number of page faults in the slow
case.  This can be avoided by using get_user_pages() to do the
"post-faulting", though.  For what it's worth, here's a patch for that
(on top of this series).

Andreas

--

fs: Avoid predictable page faults for sys_write() user buffer pages

Introduce a new fault_in_iov_iter_slow_readable() helper for faulting in
an iterator via get_user_pages() instead of triggering page faults.
This is slower than a simple memory read when the underlying pages are
resident, but avoids the page fault overhead when the underlying pages
need to be faulted in.

Use fault_in_iov_iter_slow_readable() in generic_perform_write and
iomap_write_iter when reading from the user buffer fails.

Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx>
---
 fs/iomap/buffered-io.c  |  2 +-
 include/linux/pagemap.h |  3 ++-
 include/linux/uio.h     | 17 ++++++++++++++++-
 lib/iov_iter.c          | 10 ++++++----
 mm/filemap.c            |  2 +-
 mm/gup.c                | 10 ++++++----
 6 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index d8809cd9ab31..15a0b4bb9528 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -770,7 +770,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 				bytes = copied;
 				goto again;
 			}
-			if (fault_in_iov_iter_readable(i, bytes) != bytes)
+			if (fault_in_iov_iter_slow_readable(i, bytes) != bytes)
 				goto again;
 			status = -EFAULT;
 			break;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2f7dd14083d9..43844ed5675f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -736,8 +736,9 @@ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
  * Fault in userspace address range.
  */
 size_t fault_in_writeable(char __user *uaddr, size_t size);
-size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
 size_t fault_in_readable(const char __user *uaddr, size_t size);
+size_t __fault_in_slow(const char __user *uaddr, size_t size,
+		       unsigned int flags);
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6350354f97e9..b071f4445059 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/thread_info.h>
 #include <uapi/linux/uio.h>
+#include <linux/mm.h>
 
 struct page;
 struct pipe_inode_info;
@@ -135,7 +136,21 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
-size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
+size_t __fault_in_iov_iter_slow(const struct iov_iter *i, size_t bytes,
+				unsigned int flags);
+
+static inline size_t fault_in_iov_iter_slow_readable(const struct iov_iter *i,
+						     size_t bytes)
+{
+	return __fault_in_iov_iter_slow(i, bytes, 0);
+}
+
+static inline size_t fault_in_iov_iter_writeable(const struct iov_iter *i,
+						 size_t bytes)
+{
+	return __fault_in_iov_iter_slow(i, bytes, FOLL_WRITE);
+}
+
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 66a740e6e153..73789a5409f6 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -468,9 +468,10 @@ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
 EXPORT_SYMBOL(fault_in_iov_iter_readable);
 
 /*
- * fault_in_iov_iter_writeable - fault in iov iterator for writing
+ * __fault_in_iov_iter_slow - fault in iov iterator for reading/writing
  * @i: iterator
  * @size: maximum length
+ * @flags: FOLL_* flags (FOLL_WRITE for writing)
  *
  * Faults in the iterator using get_user_pages(), i.e., without triggering
  * hardware page faults.  This is primarily useful when we already know that
@@ -481,7 +482,8 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
  *
  * Always returns 0 for non-user-space iterators.
  */
-size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
+size_t __fault_in_iov_iter_slow(const struct iov_iter *i, size_t size,
+				unsigned int flags)
 {
 	if (iter_is_iovec(i)) {
 		size_t count = min(size, iov_iter_count(i));
@@ -495,7 +497,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
 
 			if (unlikely(!len))
 				continue;
-			ret = fault_in_safe_writeable(p->iov_base + skip, len);
+			ret = __fault_in_slow(p->iov_base + skip, len, flags);
 			count -= len - ret;
 			if (ret)
 				break;
@@ -504,7 +506,7 @@ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(fault_in_iov_iter_writeable);
+EXPORT_SYMBOL(__fault_in_iov_iter_slow);
 
 void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
diff --git a/mm/filemap.c b/mm/filemap.c
index 467cdb7d086d..7ca76f4aa974 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3787,7 +3787,7 @@ ssize_t generic_perform_write(struct file *file,
 				bytes = copied;
 				goto again;
 			}
-			if (fault_in_iov_iter_readable(i, bytes) != bytes)
+			if (fault_in_iov_iter_slow_readable(i, bytes) != bytes)
 				goto again;
 			status = -EFAULT;
 			break;
diff --git a/mm/gup.c b/mm/gup.c
index e1c7e4bde11f..def9f478a621 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1694,9 +1694,10 @@ size_t fault_in_writeable(char __user *uaddr, size_t size)
 EXPORT_SYMBOL(fault_in_writeable);
 
 /*
- * fault_in_safe_writeable - fault in an address range for writing
+ * __fault_in_slow - fault in an address range for reading/writing
  * @uaddr: start of address range
  * @size: length of address range
+ * @flags: FOLL_* flags (FOLL_WRITE for writing)
  *
  * Faults in an address range using get_user_pages, i.e., without triggering
  * hardware page faults.  This is primarily useful when we already know that
@@ -1711,7 +1712,8 @@ EXPORT_SYMBOL(fault_in_writeable);
  * Returns the number of bytes not faulted in, like copy_to_user() and
  * copy_from_user().
  */
-size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
+size_t __fault_in_slow(const char __user *uaddr, size_t size,
+		       unsigned int flags)
 {
 	unsigned long start = (unsigned long)untagged_addr(uaddr);
 	unsigned long end, nstart, nend;
@@ -1743,7 +1745,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 		nr_pages = (nend - nstart) / PAGE_SIZE;
 		ret = __get_user_pages_locked(mm, nstart, nr_pages,
 					      NULL, NULL, &locked,
-					      FOLL_TOUCH | FOLL_WRITE);
+					      FOLL_TOUCH | flags);
 		if (ret <= 0)
 			break;
 		nend = nstart + ret * PAGE_SIZE;
@@ -1754,7 +1756,7 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 		return 0;
 	return size - min_t(size_t, nstart - start, size);
 }
-EXPORT_SYMBOL(fault_in_safe_writeable);
+EXPORT_SYMBOL(__fault_in_slow);
 
 /**
  * fault_in_readable - fault in userspace address range for reading
-- 
2.26.3




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux