From: Willem de Bruijn <willemb@xxxxxxxxxx> Add syscall epoll_pwait2, an epoll_wait variant with nsec resolution that replaces int timeout with struct timespec. It is equivalent otherwise. int epoll_pwait2(int fd, struct epoll_event *events, int maxevents, const struct timespec *timeout, const sigset_t *sigset); The underlying hrtimer is already programmed with nsec resolution. pselect and ppoll also set nsec resolution timeout with timespec. The sigset_t in epoll_pwait has a compat variant. epoll_pwait2 needs the same. For timespec, only support this new interface on 2038 aware platforms that define __kernel_timespec_t. So no CONFIG_COMPAT_32BIT_TIME. Changes v3: - rewrite: add epoll_pwait2 syscall instead of epoll_create1 flag v2: - cast to s64: avoid overflow on 32-bit platforms (Shuo Chen) - minor commit message rewording Signed-off-by: Willem de Bruijn <willemb@xxxxxxxxxx> --- This version applies cleanly to linux-next-20201117. --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/eventpoll.c | 106 ++++++++++++++++---- include/linux/compat.h | 6 ++ include/linux/syscalls.h | 5 + include/uapi/asm-generic/unistd.h | 4 +- kernel/sys_ni.c | 2 + 23 files changed, 123 insertions(+), 20 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index c5cc5bfa2062..e0a599bfb0e1 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 549 common faccessat2 sys_faccessat2 550 common process_madvise sys_process_madvise 551 common watch_mount sys_watch_mount +552 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 47325b3b661a..dbde88a855b6 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -455,3 +455,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 949788f5ba40..d1f7d35f986e 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 443 +#define __NR_compat_syscalls 444 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c71c3fe0b6cd..b84e24a7e2c0 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,6 +893,8 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_watch_mount, sys_watch_mount) #define __NR_memfd_secret 442 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) +#define __NR_epoll_pwait2 443 +__SYSCALL(__NR_epoll_pwait2, sys_epoll_pwait2) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 033244462350..c8809959636f 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -362,3 +362,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index efd3ecb3cdfc..dde585616bf8 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -441,3 +441,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 67ae5a5e4d21..4c09f27fedd0 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -447,3 +447,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index c59bc6acc47a..00921244242d 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -380,3 +380,4 @@ 439 n32 faccessat2 sys_faccessat2 440 n32 process_madvise sys_process_madvise 441 n32 watch_mount sys_watch_mount +443 n32 epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 50bbb9517052..b7920f76358d 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -356,3 +356,4 @@ 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise 441 n64 watch_mount sys_watch_mount +443 n64 epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 1d5644e221b4..2ab5ed500113 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -429,3 +429,4 @@ 439 o32 faccessat2 sys_faccessat2 440 o32 process_madvise sys_process_madvise 441 o32 watch_mount sys_watch_mount +443 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 4492cc2fce23..9caf7d969846 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -439,3 +439,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index ee122bfc2ddc..dce635420226 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -531,3 +531,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index b467b57d66d1..dedfa6db9f8d 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index f9c7ca5cb969..f7a383ae2c48 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 341fd4ba053b..a6cdf450d0f4 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -487,3 +487,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 109e6681b8fa..9a4e8ec207fc 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -447,3 +447,4 @@ 440 i386 process_madvise sys_process_madvise 441 i386 watch_mount sys_watch_mount 442 i386 memfd_secret sys_memfd_secret +443 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 742cf17d7725..e2cb5f87e760 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount 442 common memfd_secret sys_memfd_secret +443 common epoll_pwait2 sys_epoll_pwait2 # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 6225d87be81f..1c8ae5fd026e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -412,3 +412,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common watch_mount sys_watch_mount +443 common epoll_pwait2 sys_epoll_pwait2 diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 297aeb0ee9d1..0f0543e1cc6f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1714,13 +1714,11 @@ static int ep_send_events(struct eventpoll *ep, return res; } -static inline struct timespec64 ep_set_mstimeout(long ms) +static inline struct timespec64 ep_set_nstimeout(s64 timeout) { - struct timespec64 now, ts = { - .tv_sec = ms / MSEC_PER_SEC, - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), - }; + struct timespec64 now, ts; + ts = ns_to_timespec64(timeout); ktime_get_ts64(&now); return timespec64_add_safe(now, ts); } @@ -1734,7 +1732,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in - * milliseconds. If the @timeout is zero, the function will not block, + * nanoseconds. If the @timeout is zero, the function will not block, * while if the @timeout is less than zero, the function will block * until at least one event has been retrieved (or an error * occurred). @@ -1743,7 +1741,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout) + int maxevents, s64 timeout) { int res, eavail, timed_out = 0; u64 slack = 0; @@ -1753,7 +1751,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, lockdep_assert_irqs_enabled(); if (timeout > 0) { - struct timespec64 end_time = ep_set_mstimeout(timeout); + struct timespec64 end_time = ep_set_nstimeout(timeout); slack = select_estimate_accuracy(&end_time); to = &expires; @@ -2177,7 +2175,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) + int maxevents, s64 timeout) { int error; struct fd f; @@ -2218,19 +2216,27 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, return error; } +static s64 timeout_to_ns(int timeout) +{ + if (timeout <= 0) + return timeout; + else + return timeout * (s64)NSEC_PER_MSEC; +} + SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - return do_epoll_wait(epfd, events, maxevents, timeout); + return do_epoll_wait(epfd, events, maxevents, timeout_to_ns(timeout)); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout, const sigset_t __user *, sigmask, - size_t, sigsetsize) +static int do_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, s64 timeout, + const sigset_t __user *sigmask, size_t sigsetsize) { int error; @@ -2248,12 +2254,40 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, return error; } +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) +{ + return do_epoll_pwait(epfd, events, maxevents, timeout_to_ns(timeout), + sigmask, sigsetsize); +} + +SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, + int, maxevents, const struct __kernel_timespec __user *, timeout, + const sigset_t __user *, sigmask, size_t, sigsetsize) +{ + struct timespec64 ts; + s64 timeout_ns; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + if (!timespec64_valid(&ts)) + return -EINVAL; + timeout_ns = timespec64_to_ns(&ts); + } else { + timeout_ns = -1; + } + + return do_epoll_pwait(epfd, events, maxevents, timeout_ns, + sigmask, sigsetsize); +} + #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, - struct epoll_event __user *, events, - int, maxevents, int, timeout, - const compat_sigset_t __user *, sigmask, - compat_size_t, sigsetsize) +static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, s64 timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) { long err; @@ -2270,6 +2304,42 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, return err; } + +COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, + struct epoll_event __user *, events, + int, maxevents, int, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + return do_compat_epoll_pwait(epfd, events, maxevents, + timeout_to_ns(timeout), + sigmask, sigsetsize); +} + +COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, + struct epoll_event __user *, events, + int, maxevents, + const struct __kernel_timespec __user *, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 ts; + s64 timeout_ns; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + if (!timespec64_valid(&ts)) + return -EINVAL; + timeout_ns = timespec64_to_ns(&ts); + } else { + timeout_ns = -1; + } + + return do_compat_epoll_pwait(epfd, events, maxevents, timeout_ns, + sigmask, sigsetsize); +} + #endif static int __init eventpoll_init(void) diff --git a/include/linux/compat.h b/include/linux/compat.h index 14d514233e1d..76dc28a2e7f1 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -505,6 +505,12 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, int maxevents, int timeout, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); +asmlinkage long compat_sys_epoll_pwait2(int epfd, + struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e87a96ace85b..2ba45fb7874c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -362,6 +362,11 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, int maxevents, int timeout, const sigset_t __user *sigmask, size_t sigsetsize); +asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events, + int maxevents, + const struct __kernel_timespec __user *timeout, + const sigset_t __user *sigmask, + size_t sigsetsize); /* fs/fcntl.c */ asmlinkage long sys_dup(unsigned int fildes); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 3bc087d14535..aa883388700e 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -865,9 +865,11 @@ __SYSCALL(__NR_watch_mount, sys_watch_mount) #define __NR_memfd_secret 442 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #endif +#define __NR_epoll_pwait2 443 +__SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 444 /* * 32 bit systems traditionally used different diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 805fd7a668be..869aa6b5bf34 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -68,6 +68,8 @@ COND_SYSCALL(epoll_create1); COND_SYSCALL(epoll_ctl); COND_SYSCALL(epoll_pwait); COND_SYSCALL_COMPAT(epoll_pwait); +COND_SYSCALL(epoll_pwait2); +COND_SYSCALL_COMPAT(epoll_pwait2); /* fs/fcntl.c */ -- 2.29.2.454.gaff20da3a2-goog