Re: [PATCH] mm: hugetlb: fix UAF in hugetlb_handle_userfault

Mike Kravetz <mike.kravetz@xxxxxxxxxx> · Wed, 21 Sep 2022 19:35:33 -0700

On 09/21/22 17:57, John Hubbard wrote:
> On 9/21/22 16:57, Mike Kravetz wrote:
> > On 09/21/22 10:48, Mike Kravetz wrote:
> >> On 09/21/22 16:34, Liu Shixin wrote:
> >>> The vma_lock and hugetlb_fault_mutex are dropped before handling
> >>> userfault and reacquire them again after handle_userfault(), but
> >>> reacquire the vma_lock could lead to UAF[1] due to the following
> >>> race,
> >>>
> >>> hugetlb_fault
> >>>   hugetlb_no_page
> >>>     /*unlock vma_lock */
> >>>     hugetlb_handle_userfault
> >>>       handle_userfault
> >>>         /* unlock mm->mmap_lock*/
> >>>                                            vm_mmap_pgoff
> >>>                                              do_mmap
> >>>                                                mmap_region
> >>>                                                  munmap_vma_range
> >>>                                                    /* clean old vma */
> >>>         /* lock vma_lock again  <--- UAF */
> >>>     /* unlock vma_lock */
> >>>
> >>> Since the vma_lock will unlock immediately after hugetlb_handle_userfault(),
> >>> let's drop the unneeded lock and unlock in hugetlb_handle_userfault() to fix
> >>> the issue.
> >>
> >> Thank you very much!
> >>
> >> When I saw this report, the obvious fix was to do something like what you have
> >> done below.  That looks fine with a few minor comments.
> >>
> >> One question I have not yet answered is, "Does this same issue apply to
> >> follow_hugetlb_page()?".  I believe it does.  follow_hugetlb_page calls
> >> hugetlb_fault which could result in the fault being processed by userfaultfd.
> >> If we experience the race above, then the associated vma could no longer be
> >> valid when returning from hugetlb_fault.  follow_hugetlb_page and callers
> >> have a flag (locked) to deal with dropping mmap lock.  However, I am not sure
> >> if it is handled correctly WRT userfaultfd.  I think this needs to be answered
> >> before fixing.  And, if the follow_hugetlb_page code needs to be fixed it
> >> should be done at the same time.
> >>
> > 
> > To at least verify this code path, I added userfaultfd handling to the gup_test
> > program in kernel selftests.  When doing basic gup test on a hugetlb page in
> 
> Just for those of us who are easily confused by userfaultfd cases, can you show
> what that patch is? It would help me understand this a little faster.

The ugly (just throw code at it to make it work) diff is below.  All I did was
add the sample code from the userfaultfd man page.

With that change in place, I just run 'gup_test -U -z -m 2 -H'

> Actually I'm expecting that Peter can easily answer this whole thing. :)

diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index e43879291dac..491424d0a039 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -8,8 +8,11 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/syscall.h>
 #include <pthread.h>
 #include <assert.h>
+#include <poll.h>
+#include <linux/userfaultfd.h>
 #include <mm/gup_test.h>
 #include "../kselftest.h"
 
@@ -48,6 +51,75 @@ static char *cmd_to_str(unsigned long cmd)
 	return "Unknown command";
 }
 
+static void *fault_handler_thread(void *arg)
+{
+	static struct uffd_msg msg;   /* Data read from userfaultfd */
+	static int fault_cnt = 0;     /* Number of faults so far handled */
+	long uffd;
+	static char *page = NULL;
+	struct uffdio_copy uffdio_copy;
+	ssize_t nread;
+	size_t page_size = 2 * 1024 * 1024;
+
+	uffd = (long) arg;
+
+	if (page == NULL) {
+		page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (page == MAP_FAILED)
+		exit(1);
+	}
+
+	for (;;) {
+		struct pollfd pollfd;
+		int nready;
+		pollfd.fd = uffd;
+		pollfd.events = POLLIN;
+		nready = poll(&pollfd, 1, -1);
+		if (nready == -1)
+			exit(1);
+
+		printf("\nfault_handler_thread():\n");
+		printf("    poll() returns: nready = %d; "
+			"POLLIN = %d; POLLERR = %d\n", nready,
+			(pollfd.revents & POLLIN) != 0,
+			(pollfd.revents & POLLERR) != 0);
+			
+		nread = read(uffd, &msg, sizeof(msg));
+		if (nread == 0) {
+			printf("EOF on userfaultfd!\n");
+			exit(1);
+		}
+
+		if (nread == -1) {
+			printf("nread == -1\n");
+			exit(1);
+		}
+
+		if (msg.event != UFFD_EVENT_PAGEFAULT) {
+			printf("Unexpected event on userfaultfd\n");
+			exit(1);
+		}
+
+		printf("    UFFD_EVENT_PAGEFAULT event: ");
+		printf("flags = %llx; ", msg.arg.pagefault.flags);
+		printf("address = %llx\n", msg.arg.pagefault.address);
+
+		memset(page, 'A' + fault_cnt % 20, page_size);
+		fault_cnt++;
+
+		uffdio_copy.src = (unsigned long) page;
+		uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
+						~(page_size - 1);
+		uffdio_copy.len = page_size;
+		uffdio_copy.mode = 0;
+		uffdio_copy.copy = 0;
+		if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
+			exit(1);
+
+	}
+}
+
 void *gup_thread(void *data)
 {
 	struct gup_test gup = *(struct gup_test *)data;
@@ -94,7 +166,11 @@ int main(int argc, char **argv)
 	int flags = MAP_PRIVATE, touch = 0;
 	char *file = "/dev/zero";
 	pthread_t *tid;
+	pthread_t thr;
 	char *p;
+	long uffd;
+	struct uffdio_api uffdio_api;
+	struct uffdio_register uffdio_register;
 
 	while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
 		switch (opt) {
@@ -230,6 +306,18 @@ int main(int argc, char **argv)
 		exit(KSFT_SKIP);
 	}
 
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd == -1) {
+		perror("uffd open");
+		exit(1);
+	}
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = 0;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+		perror("uffd ioctl API");
+		exit(1);
+	}
+
 	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
 	if (p == MAP_FAILED) {
 		perror("mmap");
@@ -237,6 +325,20 @@ int main(int argc, char **argv)
 	}
 	gup.addr = (unsigned long)p;
 
+	uffdio_register.range.start = (unsigned long)p;
+	uffdio_register.range.len = size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+		perror("uffd ioctl API");
+		exit(1);
+	}
+	ret = pthread_create(&thr, NULL, fault_handler_thread, (void *)uffd);
+	if (ret) {
+		exit(1);
+	}
+	
+	sleep(5);
+
 	if (thp == 1)
 		madvise(p, size, MADV_HUGEPAGE);
 	else if (thp == 0)
-- 
Mike Kravetz