On Thu, Sep 19, 2019 at 8:09 AM Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote: > > On syscall entry certain work needs to be done conditionally like tracing, > seccomp etc. This code is duplicated in all architectures. > > Provide a generic version. > > Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > --- > arch/Kconfig | 3 + > include/linux/entry-common.h | 122 +++++++++++++++++++++++++++++++++++++++++++ > kernel/Makefile | 1 > kernel/entry/Makefile | 3 + > kernel/entry/common.c | 33 +++++++++++ > 5 files changed, 162 insertions(+) > > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -27,6 +27,9 @@ config HAVE_IMA_KEXEC > config HOTPLUG_SMT > bool > > +config GENERIC_ENTRY > + bool > + > config OPROFILE > tristate "OProfile system profiling" > depends on PROFILING > --- /dev/null > +++ b/include/linux/entry-common.h > @@ -0,0 +1,122 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __LINUX_ENTRYCOMMON_H > +#define __LINUX_ENTRYCOMMON_H > + > +#include <linux/tracehook.h> > +#include <linux/syscalls.h> > +#include <linux/seccomp.h> > +#include <linux/sched.h> > +#include <linux/audit.h> > + > +#include <asm/entry-common.h> > + > +/* > + * Define dummy _TIF work flags if not defined by the architecture or for > + * disabled functionality. > + */ > +#ifndef _TIF_SYSCALL_TRACE > +# define _TIF_SYSCALL_TRACE (0) > +#endif > + > +#ifndef _TIF_SYSCALL_EMU > +# define _TIF_SYSCALL_EMU (0) > +#endif > + > +#ifndef _TIF_SYSCALL_TRACEPOINT > +# define _TIF_SYSCALL_TRACEPOINT (0) > +#endif > + > +#ifndef _TIF_SECCOMP > +# define _TIF_SECCOMP (0) > +#endif > + > +#ifndef _TIF_AUDIT > +# define _TIF_AUDIT (0) > +#endif I'm wondering if these should be __TIF (double-underscore) or MAYBE_TIF_ or something to avoid errors where people do flags |= TIF_WHATEVER and get surprised. > +/** > + * syscall_enter_from_usermode - Check and handle work before invoking > + * a syscall > + * @regs: Pointer to currents pt_regs > + * @syscall: The syscall number > + * > + * Invoked from architecture specific syscall entry code with interrupts > + * enabled. > + * > + * Returns: The original or a modified syscall number > + */ Maybe document that it can return -1 to skip the syscall and that, if this happens, it may use syscall_set_error() or syscall_set_return_value() first. If neither of those is called and -1 is returned, then the syscall will fail with ENOSYS.