Al, Linus, Eric, please help. The previous discussion was very confusing, we simply can not understand each other. To me everything looks very simple and clear, but perhaps I missed something obvious? Please correct me. I think that the following code is correct int interrupted = 0; void sigint_handler(int sig) { interrupted = 1; } int main(void) { sigset_t sigint, empty; sigemptyset(&sigint); sigaddset(&sigint, SIGINT); sigprocmask(SIG_BLOCK, &sigint, NULL); signal(SIGINT, sigint_handler); sigemptyset(&empty); // so pselect() unblocks SIGINT ret = pselect(..., &empty); if (ret >= 0) // sucess or timeout assert(!interrupted); if (interrupted) assert(ret == -EINTR); } IOW, if pselect(sigmask) temporary unblocks SIGINT according to sigmask, this signal should not be delivered if a ready fd was found or timeout. The signal handle should only run if ret == -EINTR. (pselect() can be interrupted by any other signal which has a handler. In this case the handler can be called even if ret >= 0. This is correct, I fail to understand why some people think this is wrong, and in any case we simply can't avoid this). This was true until 854a6ed56839a ("signal: Add restore_user_sigmask()"), now this is broken by the signal_pending() check in restore_user_sigmask(). This patch https://lore.kernel.org/lkml/20190522032144.10995-1-deepa.kernel@xxxxxxxxx/ turns 0 into -EINTR if signal_pending(), but I think we should simply restore the old behaviour and simplify the code. See the compile-tested patch at the end. Of course, the new _xxx() helpers should be renamed somehow. fs/aio.c doesn't look right with or without this patch, but iiuc this is what it did before 854a6ed56839a. Let me show the code with the patch applied. I am using epoll_pwait() as an example because it looks very simple. static inline void set_restore_sigmask(void) { // WARN_ON(!TIF_SIGPENDING) was removed by this patch current->restore_sigmask = true; } int set_xxx(const sigset_t __user *umask, size_t sigsetsize) { sigset_t *kmask; if (!umask) return 0; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; if (copy_from_user(kmask, umask, sizeof(sigset_t))) return -EFAULT; // we can safely modify ->saved_sigmask/restore_sigmask, they has no meaning // until the syscall returns. set_restore_sigmask(); current->saved_sigmask = current->blocked; set_current_blocked(kmask); return 0; } void update_xxx(bool interrupted) { // the main reason for this helper is WARN_ON(!TIF_SIGPENDING) which was "moved" // from set_restore_sigmask() above. if (interrupted) WARN_ON(!test_thread_flag(TIF_SIGPENDING)); else restore_saved_sigmask(); } SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout, const sigset_t __user *, sigmask, size_t, sigsetsize) { int error; error = set_xxx(sigmask, sigsetsize); if (error) return error; error = do_epoll_wait(epfd, events, maxevents, timeout); update_xxx(error == -EINTR); return error; } Oleg. --- fs/aio.c | 40 ++++++++++++++--------- fs/eventpoll.c | 12 +++---- fs/io_uring.c | 12 +++---- fs/select.c | 40 +++++++---------------- include/linux/compat.h | 4 +-- include/linux/sched/signal.h | 2 -- include/linux/signal.h | 6 ++-- kernel/signal.c | 77 ++++++++++++++++---------------------------- 8 files changed, 74 insertions(+), 119 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 3490d1f..8315bd2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2093,8 +2093,8 @@ SYSCALL_DEFINE6(io_pgetevents, const struct __aio_sigset __user *, usig) { struct __aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_timespec64(&ts, timeout))) @@ -2103,13 +2103,15 @@ SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_xxx(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + update_xxx(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2126,8 +2128,8 @@ SYSCALL_DEFINE6(io_pgetevents_time32, const struct __aio_sigset __user *, usig) { struct __aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 ts; + bool interrupted; int ret; if (timeout && unlikely(get_old_timespec32(&ts, timeout))) @@ -2137,13 +2139,15 @@ SYSCALL_DEFINE6(io_pgetevents_time32, return -EFAULT; - ret = set_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_xxx(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + update_xxx(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2191,8 +2195,8 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, const struct __compat_aio_sigset __user *, usig) { struct __compat_aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_old_timespec32(&t, timeout)) @@ -2201,13 +2205,15 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_compat_xxx(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + update_xxx(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; @@ -2224,8 +2230,8 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, const struct __compat_aio_sigset __user *, usig) { struct __compat_aio_sigset ksig = { NULL, }; - sigset_t ksigmask, sigsaved; struct timespec64 t; + bool interrupted; int ret; if (timeout && get_timespec64(&t, timeout)) @@ -2234,13 +2240,15 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, &ksigmask, &sigsaved, ksig.sigsetsize); + ret = set_compat_xxx(ksig.sigmask, ksig.sigsetsize); if (ret) return ret; ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL); - restore_user_sigmask(ksig.sigmask, &sigsaved); - if (signal_pending(current) && !ret) + + interrupted = signal_pending(current); + update_xxx(interrupted); + if (interrupted && !ret) ret = -ERESTARTNOHAND; return ret; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 4a0e98d..0b1a337 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2318,19 +2318,17 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, size_t, sigsetsize) { int error; - sigset_t ksigmask, sigsaved; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ - error = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + error = set_xxx(sigmask, sigsetsize); if (error) return error; error = do_epoll_wait(epfd, events, maxevents, timeout); - - restore_user_sigmask(sigmask, &sigsaved); + update_xxx(error == -EINTR); return error; } @@ -2343,19 +2341,17 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, compat_size_t, sigsetsize) { long err; - sigset_t ksigmask, sigsaved; /* * If the caller wants a certain signal mask to be set during the wait, * we apply it here. */ - err = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + err = set_compat_xxx(sigmask, sigsetsize); if (err) return err; err = do_epoll_wait(epfd, events, maxevents, timeout); - - restore_user_sigmask(sigmask, &sigsaved); + update_xxx(err == -EINTR); return err; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 310f8d1..b5b99e2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2180,7 +2180,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, const sigset_t __user *sig, size_t sigsz) { struct io_cq_ring *ring = ctx->cq_ring; - sigset_t ksigmask, sigsaved; int ret; if (io_cqring_events(ring) >= min_events) @@ -2189,24 +2188,21 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (sig) { #ifdef CONFIG_COMPAT if (in_compat_syscall()) - ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - &ksigmask, &sigsaved, sigsz); + ret = set_compat_xxx((const compat_sigset_t __user *)sig, + sigsz); else #endif - ret = set_user_sigmask(sig, &ksigmask, - &sigsaved, sigsz); + ret = set_xxx(sig, sigsz); if (ret) return ret; } ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events); + update_xxx(ret == -ERESTARTSYS); if (ret == -ERESTARTSYS) ret = -EINTR; - if (sig) - restore_user_sigmask(sig, &sigsaved); - return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0; } diff --git a/fs/select.c b/fs/select.c index 6cbc9ff..7eab132 100644 --- a/fs/select.c +++ b/fs/select.c @@ -730,7 +730,6 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, const sigset_t __user *sigmask, size_t sigsetsize, enum poll_time_type type) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -753,15 +752,14 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, return -EINVAL; } - ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_xxx(sigmask, sigsetsize); if (ret) return ret; ret = core_sys_select(n, inp, outp, exp, to); + update_xxx(ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1087,7 +1085,6 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1100,18 +1097,16 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, return -EINVAL; } - ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_xxx(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + update_xxx(ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; - ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); return ret; @@ -1123,7 +1118,6 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1136,18 +1130,16 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, return -EINVAL; } - ret = set_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_xxx(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + update_xxx(ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; - ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); return ret; @@ -1322,7 +1314,6 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, void __user *tsp, compat_sigset_t __user *sigmask, compat_size_t sigsetsize, enum poll_time_type type) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1345,15 +1336,14 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return -EINVAL; } - ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_compat_xxx(sigmask, sigsetsize); if (ret) return ret; ret = compat_core_sys_select(n, inp, outp, exp, to); + update_xxx(ret == -ERESTARTNOHAND); ret = poll_select_copy_remaining(&end_time, tsp, type, ret); - restore_user_sigmask(sigmask, &sigsaved); - return ret; } @@ -1406,7 +1396,6 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds, struct old_timespec32 __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1419,18 +1408,16 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, return -EINVAL; } - ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_compat_xxx(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + update_xxx(ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; - ret = poll_select_copy_remaining(&end_time, tsp, PT_OLD_TIMESPEC, ret); return ret; @@ -1442,7 +1429,6 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, unsigned int, nfds, struct __kernel_timespec __user *, tsp, const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { - sigset_t ksigmask, sigsaved; struct timespec64 ts, end_time, *to = NULL; int ret; @@ -1455,18 +1441,16 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds, return -EINVAL; } - ret = set_compat_user_sigmask(sigmask, &ksigmask, &sigsaved, sigsetsize); + ret = set_compat_xxx(sigmask, sigsetsize); if (ret) return ret; ret = do_sys_poll(ufds, nfds, to); - restore_user_sigmask(sigmask, &sigsaved); - + update_xxx(ret == -EINTR); /* We can restart this syscall, usually */ if (ret == -EINTR) ret = -ERESTARTNOHAND; - ret = poll_select_copy_remaining(&end_time, tsp, PT_TIMESPEC, ret); return ret; diff --git a/include/linux/compat.h b/include/linux/compat.h index ebddcb6..b20b001 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -138,9 +138,7 @@ typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, - sigset_t *set, sigset_t *oldset, - size_t sigsetsize); +int set_compat_xxx(const compat_sigset_t __user *umask, size_t sigsetsize); struct compat_sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 38a0f07..8b5b537 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -417,7 +417,6 @@ void task_join_group_stop(struct task_struct *task); static inline void set_restore_sigmask(void) { set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } static inline void clear_tsk_restore_sigmask(struct task_struct *task) @@ -448,7 +447,6 @@ static inline bool test_and_clear_restore_sigmask(void) static inline void set_restore_sigmask(void) { current->restore_sigmask = true; - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } static inline void clear_tsk_restore_sigmask(struct task_struct *task) { diff --git a/include/linux/signal.h b/include/linux/signal.h index 9702016..65f84ac 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -273,10 +273,8 @@ extern int group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, enum pid_type type); extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); extern int sigprocmask(int, sigset_t *, sigset_t *); -extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, - sigset_t *oldset, size_t sigsetsize); -extern void restore_user_sigmask(const void __user *usigmask, - sigset_t *sigsaved); +extern int set_xxx(const sigset_t __user *umask, size_t sigsetsize); +extern void update_xxx(bool interupted); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/signal.c b/kernel/signal.c index d7b9d14..0a1ec68 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2861,79 +2861,56 @@ EXPORT_SYMBOL(sigprocmask); * * This is useful for syscalls such as ppoll, pselect, io_pgetevents and * epoll_pwait where a new sigmask is passed from userland for the syscalls. + * + * Note that it does set_restore_sigmask() in advance, so it must be always + * paired with update_xxx() before return from syscall. */ -int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set, - sigset_t *oldset, size_t sigsetsize) +int set_xxx(const sigset_t __user *umask, size_t sigsetsize) { - if (!usigmask) - return 0; + sigset_t *kmask; + if (!umask) + return 0; if (sigsetsize != sizeof(sigset_t)) return -EINVAL; - if (copy_from_user(set, usigmask, sizeof(sigset_t))) + if (copy_from_user(kmask, umask, sizeof(sigset_t))) return -EFAULT; - *oldset = current->blocked; - set_current_blocked(set); + set_restore_sigmask(); + current->saved_sigmask = current->blocked; + set_current_blocked(kmask); return 0; } -EXPORT_SYMBOL(set_user_sigmask); + +void update_xxx(bool interrupted) +{ + if (interrupted) + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); + else + restore_saved_sigmask(); +} #ifdef CONFIG_COMPAT -int set_compat_user_sigmask(const compat_sigset_t __user *usigmask, - sigset_t *set, sigset_t *oldset, - size_t sigsetsize) +int set_compat_xxx(const compat_sigset_t __user *umask, size_t sigsetsize) { - if (!usigmask) - return 0; + sigset_t *kmask; + if (!umask) + return 0; if (sigsetsize != sizeof(compat_sigset_t)) return -EINVAL; - if (get_compat_sigset(set, usigmask)) + if (get_compat_sigset(kmask, umask)) return -EFAULT; - *oldset = current->blocked; - set_current_blocked(set); + set_restore_sigmask(); + current->saved_sigmask = current->blocked; + set_current_blocked(kmask); return 0; } -EXPORT_SYMBOL(set_compat_user_sigmask); #endif -/* - * restore_user_sigmask: - * usigmask: sigmask passed in from userland. - * sigsaved: saved sigmask when the syscall started and changed the sigmask to - * usigmask. - * - * This is useful for syscalls such as ppoll, pselect, io_pgetevents and - * epoll_pwait where a new sigmask is passed in from userland for the syscalls. - */ -void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) -{ - - if (!usigmask) - return; - /* - * When signals are pending, do not restore them here. - * Restoring sigmask here can lead to delivering signals that the above - * syscalls are intended to block because of the sigmask passed in. - */ - if (signal_pending(current)) { - current->saved_sigmask = *sigsaved; - set_restore_sigmask(); - return; - } - - /* - * This is needed because the fast syscall return path does not restore - * saved_sigmask when signals are not pending. - */ - set_current_blocked(sigsaved); -} -EXPORT_SYMBOL(restore_user_sigmask); - /** * sys_rt_sigprocmask - change the list of currently blocked signals * @how: whether to add, remove, or set signals