[PATCH v2 3/5] userfaultfd: introduce write-likely mode for uffd operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Nadav Amit <namit@xxxxxxxxxx>

Introduce write-likely hints for uffd. These hints would be used in a
future patch to decide whether to attempt to map pages in the page-table
or whether to only mark them logically as writable. This allows
userspace to determine whether a page would be accessed faster or
whether removal of the page would be possible, potentially, without
writeback and TLB flush.

Cc: Mike Kravetz <mike.kravetz@xxxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Axel Rasmussen <axelrasmussen@xxxxxxxxxx>
Cc: Peter Xu <peterx@xxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: Mike Rapoport <rppt@xxxxxxxxxxxxx>
Signed-off-by: Nadav Amit <namit@xxxxxxxxxx>
---
 fs/userfaultfd.c                 | 32 ++++++++++++++++++++++++--------
 include/linux/userfaultfd_k.h    |  1 +
 include/uapi/linux/userfaultfd.h | 13 ++++++++++++-
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 8d8792b27c53..3027d228550a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1709,7 +1709,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
 		goto out;
 	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP|
-				 UFFDIO_COPY_MODE_ACCESS_LIKELY))
+				 UFFDIO_COPY_MODE_ACCESS_LIKELY|
+				 UFFDIO_COPY_MODE_WRITE_LIKELY))
 		goto out;
 
 	mode_wp = uffdio_copy.mode & UFFDIO_COPY_MODE_WP;
@@ -1719,8 +1720,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_copy.mode & UFFDIO_COPY_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_copy.mode & UFFDIO_COPY_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
@@ -1774,14 +1778,18 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 		goto out;
 	ret = -EINVAL;
 	if (uffdio_zeropage.mode & ~(UFFDIO_ZEROPAGE_MODE_DONTWAKE|
-				     UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY))
+				     UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY|
+				     UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY))
 		goto out;
 
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
@@ -1834,7 +1842,8 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 
 	if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
 			       UFFDIO_WRITEPROTECT_MODE_WP |
-			       UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY))
+			       UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY |
+			       UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY))
 		return -EINVAL;
 
 	mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
@@ -1847,8 +1856,11 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
@@ -1903,14 +1915,18 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 		goto out;
 	}
 	if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE|
-				     UFFDIO_CONTINUE_MODE_ACCESS_LIKELY))
+				     UFFDIO_CONTINUE_MODE_ACCESS_LIKELY|
+				     UFFDIO_CONTINUE_MODE_WRITE_LIKELY))
 		goto out;
 
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index b326798b5677..4968c86938b2 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -60,6 +60,7 @@ typedef unsigned int __bitwise uffd_flags_t;
 #define UFFD_FLAGS_NONE			((__force uffd_flags_t)0)
 #define UFFD_FLAGS_WP			((__force uffd_flags_t)BIT(0))
 #define UFFD_FLAGS_ACCESS_LIKELY	((__force uffd_flags_t)BIT(1))
+#define UFFD_FLAGS_WRITE_LIKELY		((__force uffd_flags_t)BIT(2))
 
 extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 				    struct vm_area_struct *dst_vma,
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 02e0c1f56939..f52cbe4c9c44 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -202,7 +202,7 @@ struct uffdio_api {
 	 * write-protection mode is supported on both shmem and hugetlbfs.
 	 *
 	 * UFFD_FEATURE_ACCESS_HINTS indicates that the ioctl operations
-	 * support the UFFDIO_*_MODE_ACCESS_LIKELY hints.
+	 * support the UFFDIO_*_MODE_[ACCESS|WRITE]_LIKELY hints.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -257,9 +257,13 @@ struct uffdio_copy {
 	 * page is likely to be access in the near future. Providing the hint
 	 * properly can improve performance.
 	 *
+	 * UFFDIO_COPY_MODE_WRITE_LIKELY provides a hint to the kernel that the
+	 * page is likely to be written in the near future. Providing the hint
+	 * properly can improve performance.
 	 */
 #define UFFDIO_COPY_MODE_WP			((__u64)1<<1)
 #define UFFDIO_COPY_MODE_ACCESS_LIKELY		((__u64)1<<2)
+#define UFFDIO_COPY_MODE_WRITE_LIKELY		((__u64)1<<3)
 	__u64 mode;
 
 	/*
@@ -273,6 +277,7 @@ struct uffdio_zeropage {
 	struct uffdio_range range;
 #define UFFDIO_ZEROPAGE_MODE_DONTWAKE		((__u64)1<<0)
 #define UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY	((__u64)1<<1)
+#define UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY	((__u64)1<<2)
 	__u64 mode;
 
 	/*
@@ -296,6 +301,10 @@ struct uffdio_writeprotect {
  * that the page is likely to be access in the near future. Providing
  * the hint properly can improve performance.
  *
+ * UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY: provides a hint to the kernel
+ * that the page is likely to be written in the near future. Providing
+ * the hint properly can improve performance.
+ *
  * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
  * therefore DONTWAKE flag is meaningless with WP=1.  Removing write
  * protection (WP=0) in response to a page fault wakes the faulting
@@ -304,6 +313,7 @@ struct uffdio_writeprotect {
 #define UFFDIO_WRITEPROTECT_MODE_WP		((__u64)1<<0)
 #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE	((__u64)1<<1)
 #define UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY	((__u64)1<<2)
+#define UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY	((__u64)1<<3)
 	__u64 mode;
 };
 
@@ -311,6 +321,7 @@ struct uffdio_continue {
 	struct uffdio_range range;
 #define UFFDIO_CONTINUE_MODE_DONTWAKE		((__u64)1<<0)
 #define UFFDIO_CONTINUE_MODE_ACCESS_LIKELY	((__u64)1<<1)
+#define UFFDIO_CONTINUE_MODE_WRITE_LIKELY	((__u64)1<<2)
 	__u64 mode;
 
 	/*
-- 
2.25.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux