From: Willem de Bruijn <willemb@xxxxxxxxxx> Subject: epoll: convert internal api to timespec64 Patch series "add epoll_pwait2 syscall", v4. Enable nanosecond timeouts for epoll. Analogous to pselect and ppoll, introduce an epoll_wait syscall variant that takes a struct timespec instead of int timeout. This patch (of 4): Make epoll more consistent with select/poll: pass along the timeout as timespec64 pointer. In anticipation of additional changes affecting all three polling mechanisms: - add epoll_pwait2 syscall with timespec semantics, and share poll_select_set_timeout implementation. - compute slack before conversion to absolute time, to save one ktime_get_ts64 call. Link: https://lkml.kernel.org/r/20201121144401.3727659-1-willemdebruijn.kernel@xxxxxxxxx Link: https://lkml.kernel.org/r/20201121144401.3727659-2-willemdebruijn.kernel@xxxxxxxxx Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Cc: Arnd Bergmann <arnd@xxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/eventpoll.c | 57 ++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 20 deletions(-) --- a/fs/eventpoll.c~epoll-convert-internal-api-to-timespec64 +++ a/fs/eventpoll.c @@ -1712,15 +1712,25 @@ static int ep_send_events(struct eventpo return res; } -static inline struct timespec64 ep_set_mstimeout(long ms) +static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { - struct timespec64 now, ts = { - .tv_sec = ms / MSEC_PER_SEC, - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), - }; + struct timespec64 now; + + if (ms < 0) + return NULL; + + if (!ms) { + to->tv_sec = 0; + to->tv_nsec = 0; + return to; + } + + to->tv_sec = ms / MSEC_PER_SEC; + to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); - return timespec64_add_safe(now, ts); + *to = timespec64_add_safe(now, *to); + return to; } /** @@ -1732,8 +1742,8 @@ static inline struct timespec64 ep_set_m * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in - * milliseconds. If the @timeout is zero, the function will not block, - * while if the @timeout is less than zero, the function will block + * timespec. If the timeout is zero, the function will not block, + * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * @@ -1741,7 +1751,7 @@ static inline struct timespec64 ep_set_m * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout) + int maxevents, struct timespec64 *timeout) { int res, eavail, timed_out = 0; u64 slack = 0; @@ -1750,13 +1760,11 @@ static int ep_poll(struct eventpoll *ep, lockdep_assert_irqs_enabled(); - if (timeout > 0) { - struct timespec64 end_time = ep_set_mstimeout(timeout); - - slack = select_estimate_accuracy(&end_time); + if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { + slack = select_estimate_accuracy(timeout); to = &expires; - *to = timespec64_to_ktime(end_time); - } else if (timeout == 0) { + *to = timespec64_to_ktime(*timeout); + } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the * caller specified a non blocking operation. @@ -2175,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, in * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) + int maxevents, struct timespec64 *to) { int error; struct fd f; @@ -2209,7 +2217,7 @@ static int do_epoll_wait(int epfd, struc ep = f.file->private_data; /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, timeout); + error = ep_poll(ep, events, maxevents, to); error_fput: fdput(f); @@ -2219,7 +2227,10 @@ error_fput: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - return do_epoll_wait(epfd, events, maxevents, timeout); + struct timespec64 to; + + return do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); } /* @@ -2230,6 +2241,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { + struct timespec64 to; int error; /* @@ -2240,7 +2252,9 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, if (error) return error; - error = do_epoll_wait(epfd, events, maxevents, timeout); + error = do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); + restore_saved_sigmask_unless(error == -EINTR); return error; @@ -2253,6 +2267,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { + struct timespec64 to; long err; /* @@ -2263,7 +2278,9 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, if (err) return err; - err = do_epoll_wait(epfd, events, maxevents, timeout); + err = do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); + restore_saved_sigmask_unless(err == -EINTR); return err; _