From: Nadav Amit <namit@xxxxxxxxxx> Add a feature UFFD_FEATURE_POLL that makes the faulting thread spin while waiting for the page-fault to be handled. Users of this feature should be wise by setting the page-fault handling thread on another physical CPU and to potentially ensure that there are available cores to run the handler, as otherwise they will see performance degradation. We can later enhance it by setting one or two timeouts: one timeout until the page-fault is handled and another until the handler was woken. Cc: Jens Axboe <axboe@xxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Peter Xu <peterx@xxxxxxxxxx> Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: io-uring@xxxxxxxxxxxxxxx Cc: linux-fsdevel@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx Signed-off-by: Nadav Amit <namit@xxxxxxxxxx> --- fs/userfaultfd.c | 24 ++++++++++++++++++++---- include/uapi/linux/userfaultfd.h | 9 ++++++++- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index fedf7c1615d5..b6a04e526025 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -122,7 +122,9 @@ static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, if (len && (start > uwq->msg.arg.pagefault.address || start + len <= uwq->msg.arg.pagefault.address)) goto out; - WRITE_ONCE(uwq->waken, true); + + smp_store_mb(uwq->waken, true); + /* * The Program-Order guarantees provided by the scheduler * ensure uwq->waken is visible before the task is woken. @@ -377,6 +379,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) vm_fault_t ret = VM_FAULT_SIGBUS; bool must_wait; long blocking_state; + bool poll; /* * We don't do userfault handling for the final child pid update. @@ -410,6 +413,8 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (ctx->features & UFFD_FEATURE_SIGBUS) goto out; + poll = ctx->features & UFFD_FEATURE_POLL; + /* * If it's already released don't get it. This avoids to loop * in __get_user_pages if userfaultfd_release waits on the @@ -495,7 +500,10 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ - set_current_state(blocking_state); + + if (!poll) + set_current_state(blocking_state); + spin_unlock_irq(&ctx->fault_pending_wqh.lock); if (!is_vm_hugetlb_page(vmf->vma)) @@ -509,10 +517,18 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (likely(must_wait && !READ_ONCE(ctx->released))) { wake_up_poll(&ctx->fd_wqh, EPOLLIN); - schedule(); + if (poll) { + while (!READ_ONCE(uwq.waken) && !READ_ONCE(ctx->released) && + !signal_pending(current)) { + cpu_relax(); + cond_resched(); + } + } else + schedule(); } - __set_current_state(TASK_RUNNING); + if (!poll) + __set_current_state(TASK_RUNNING); /* * Here we race with the list_del; list_add in diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index e7e98bde221f..4eeba4235afe 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -27,7 +27,9 @@ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM | \ UFFD_FEATURE_SIGBUS | \ - UFFD_FEATURE_THREAD_ID) + UFFD_FEATURE_THREAD_ID | \ + UFFD_FEATURE_POLL) + #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -171,6 +173,10 @@ struct uffdio_api { * * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will * be returned, if feature is not requested 0 will be returned. + * + * UFFD_FEATURE_POLL polls upon page-fault if the feature is requested + * instead of descheduling. This feature should only be enabled for + * low-latency handlers and when CPUs are not overcomitted. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -181,6 +187,7 @@ struct uffdio_api { #define UFFD_FEATURE_EVENT_UNMAP (1<<6) #define UFFD_FEATURE_SIGBUS (1<<7) #define UFFD_FEATURE_THREAD_ID (1<<8) +#define UFFD_FEATURE_POLL (1<<9) __u64 features; __u64 ioctls; -- 2.25.1