On Mon, Jan 13, 2025 at 07:12:08PM +0200, Dmitry V. Levin wrote: > PTRACE_SET_SYSCALL_INFO is a generic ptrace API that complements > PTRACE_GET_SYSCALL_INFO by letting the ptracer modify details of > system calls the tracee is blocked in. > > This API allows ptracers to obtain and modify system call details > in a straightforward and architecture-agnostic way. > > Current implementation supports changing only those bits of system call > information that are used by strace, namely, syscall number, syscall > arguments, and syscall return value. > > Support of changing additional details returned by PTRACE_GET_SYSCALL_INFO, > such as instruction pointer and stack pointer, could be added later > if needed, by using struct ptrace_syscall_info.flags to specify > the additional details that should be set. Currently, flags and reserved > fields of struct ptrace_syscall_info must be initialized with zeroes; > arch, instruction_pointer, and stack_pointer fields are ignored. > > PTRACE_SET_SYSCALL_INFO currently supports only PTRACE_SYSCALL_INFO_ENTRY, > PTRACE_SYSCALL_INFO_EXIT, and PTRACE_SYSCALL_INFO_SECCOMP operations. > Other operations could be added later if needed. > > Ideally, PTRACE_SET_SYSCALL_INFO should have been introduced along with > PTRACE_GET_SYSCALL_INFO, but it didn't happen. The last straw that > convinced me to implement PTRACE_SET_SYSCALL_INFO was apparent failure > to provide an API of changing the first system call argument on riscv > architecture. > > ptrace(2) man page: > > long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data); > ... > PTRACE_SET_SYSCALL_INFO > Modify information about the system call that caused the stop. > The "data" argument is a pointer to struct ptrace_syscall_info > that specifies the system call information to be set. > The "addr" argument should be set to sizeof(struct ptrace_syscall_info)). > > Link: https://lore.kernel.org/all/59505464-c84a-403d-972f-d4b2055eeaac@xxxxxxxxx/ > Signed-off-by: Dmitry V. Levin <ldv@xxxxxxxxx> > --- > include/linux/ptrace.h | 3 ++ > include/uapi/linux/ptrace.h | 4 +- > kernel/ptrace.c | 95 +++++++++++++++++++++++++++++++++++++ > 3 files changed, 101 insertions(+), 1 deletion(-) > > diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h > index 90507d4afcd6..c8dbf1e498bf 100644 > --- a/include/linux/ptrace.h > +++ b/include/linux/ptrace.h > @@ -17,6 +17,9 @@ struct syscall_info { > struct seccomp_data data; > }; > > +/* sizeof() the first published struct ptrace_syscall_info */ > +#define PTRACE_SYSCALL_INFO_SIZE_VER0 84 > + > extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, > void *buf, int len, unsigned int gup_flags); > > diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h > index 72c038fc71d0..ca75b3ab5d22 100644 > --- a/include/uapi/linux/ptrace.h > +++ b/include/uapi/linux/ptrace.h > @@ -74,6 +74,7 @@ struct seccomp_metadata { > }; > > #define PTRACE_GET_SYSCALL_INFO 0x420e > +#define PTRACE_SET_SYSCALL_INFO 0x4212 > #define PTRACE_SYSCALL_INFO_NONE 0 > #define PTRACE_SYSCALL_INFO_ENTRY 1 > #define PTRACE_SYSCALL_INFO_EXIT 2 > @@ -81,7 +82,8 @@ struct seccomp_metadata { > > struct ptrace_syscall_info { > __u8 op; /* PTRACE_SYSCALL_INFO_* */ > - __u8 pad[3]; > + __u8 reserved; > + __u16 flags; > __u32 arch; > __u64 instruction_pointer; > __u64 stack_pointer; > diff --git a/kernel/ptrace.c b/kernel/ptrace.c > index 22e7d74cf4cd..41d37cb8f74a 100644 > --- a/kernel/ptrace.c > +++ b/kernel/ptrace.c > @@ -1016,6 +1016,97 @@ ptrace_get_syscall_info(struct task_struct *child, unsigned long user_size, > write_size = min(actual_size, user_size); > return copy_to_user(datavp, &info, write_size) ? -EFAULT : actual_size; > } > + > +static unsigned long > +ptrace_set_syscall_info_entry(struct task_struct *child, struct pt_regs *regs, > + struct ptrace_syscall_info *info) > +{ > + unsigned long args[ARRAY_SIZE(info->entry.args)]; > + int nr = info->entry.nr; > + int i; > + > + if (nr != info->entry.nr) > + return -ERANGE; > + > + for (i = 0; i < ARRAY_SIZE(args); i++) { > + args[i] = info->entry.args[i]; > + if (args[i] != info->entry.args[i]) > + return -ERANGE; > + } > + > + syscall_set_nr(child, regs, nr); > + /* > + * If the syscall number is set to -1, setting syscall arguments is not > + * just pointless, it would also clobber the syscall return value on > + * those architectures that share the same register both for the first > + * argument of syscall and its return value. > + */ > + if (nr != -1) > + syscall_set_arguments(child, regs, args); > + > + return 0; > +} > + > +static unsigned long > +ptrace_set_syscall_info_seccomp(struct task_struct *child, struct pt_regs *regs, > + struct ptrace_syscall_info *info) > +{ > + /* > + * info->entry is currently a subset of info->seccomp, > + * info->seccomp.ret_data is currently ignored. > + */ > + return ptrace_set_syscall_info_entry(child, regs, info); > +} > + > +static unsigned long > +ptrace_set_syscall_info_exit(struct task_struct *child, struct pt_regs *regs, > + struct ptrace_syscall_info *info) > +{ > + if (info->exit.is_error) > + syscall_set_return_value(child, regs, info->exit.rval, 0); > + else > + syscall_set_return_value(child, regs, 0, info->exit.rval); > + > + return 0; > +} > + > +static int > +ptrace_set_syscall_info(struct task_struct *child, unsigned long user_size, > + void __user *datavp) > +{ > + struct pt_regs *regs = task_pt_regs(child); > + struct ptrace_syscall_info info; > + int error; > + > + BUILD_BUG_ON(sizeof(struct ptrace_syscall_info) < PTRACE_SYSCALL_INFO_SIZE_VER0); > + > + if (user_size < PTRACE_SYSCALL_INFO_SIZE_VER0 || user_size > PAGE_SIZE) > + return -EINVAL; > + > + error = copy_struct_from_user(&info, sizeof(info), datavp, user_size); > + if (error) > + return error; > + > + /* Reserved for future use. */ > + if (info.flags || info.reserved) > + return -EINVAL; > + > + /* Changing the type of the system call stop is not supported. */ > + if (ptrace_get_syscall_info_op(child) != info.op) Since this isn't supported anyway, would it make sense to set the info.op to ptrace_get_syscall_info_op(child) like is done for get_syscall_info? The usecase I see for this is simplifying when the user doesn't call PTRACE_GET_SYSCALL_INFO before calling PTRACE_SET_SYSCALL_INFO. - Charlie > + return -EINVAL; > + > + switch (info.op) { > + case PTRACE_SYSCALL_INFO_ENTRY: > + return ptrace_set_syscall_info_entry(child, regs, &info); > + case PTRACE_SYSCALL_INFO_EXIT: > + return ptrace_set_syscall_info_exit(child, regs, &info); > + case PTRACE_SYSCALL_INFO_SECCOMP: > + return ptrace_set_syscall_info_seccomp(child, regs, &info); > + default: > + /* Other types of system call stops are not supported. */ > + return -EINVAL; > + } > +} > #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ > > int ptrace_request(struct task_struct *child, long request, > @@ -1234,6 +1325,10 @@ int ptrace_request(struct task_struct *child, long request, > case PTRACE_GET_SYSCALL_INFO: > ret = ptrace_get_syscall_info(child, addr, datavp); > break; > + > + case PTRACE_SET_SYSCALL_INFO: > + ret = ptrace_set_syscall_info(child, addr, datavp); > + break; > #endif > > case PTRACE_SECCOMP_GET_FILTER: > -- > ldv