On Mon, Jan 6, 2025 at 6:12 PM Jordan Rome <linux@xxxxxxxxxxxxxx> wrote: > > This new kfunc will be able to copy a string > from another process's/task's address space. nit: this is kernel code, task is unambiguous, so I'd drop the "process" reference here > This is similar to `bpf_copy_from_user_str` > but accepts a `struct task_struct*` argument. > > This required adding an additional function > in memory.c, namely `copy_str_from_process_vm`, > which works similar to `access_process_vm` > but utilizes the `strncpy_from_user` helper > and only supports reading/copying and not writing. > > Signed-off-by: Jordan Rome <linux@xxxxxxxxxxxxxx> > --- > include/linux/mm.h | 3 ++ > kernel/bpf/helpers.c | 46 ++++++++++++++++++++ > mm/memory.c | 101 +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 150 insertions(+) > please check kernel test bot's complains as well > diff --git a/include/linux/mm.h b/include/linux/mm.h > index c39c4945946c..52b304b20630 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2484,6 +2484,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, > extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, > void *buf, int len, unsigned int gup_flags); > > +extern int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr, > + void *buf, int len, unsigned int gup_flags); nit: curious what mm folks think about naming, I'd go with a slightly less verbose naming: "copy_remote_vm_str" (copy vs access, _str suffix for non fixed-sized semantics marking) for the next revision, let's split out mm parts from helpers parts, I don't think we lose much as this new internal API is self-contained, and it will be easier for mm folks to review > + > long get_user_pages_remote(struct mm_struct *mm, > unsigned long start, unsigned long nr_pages, > unsigned int gup_flags, struct page **pages, > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index cd5f9884d85b..45d41b7a9906 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -3072,6 +3072,51 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag) > local_irq_restore(*flags__irq_flag); > } > > +/** > + * bpf_copy_from_user_task_str() - Copy a string from an task's address space > + * @dst: Destination address, in kernel space. This buffer must be > + * at least @dst__sz bytes long. > + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. > + * @unsafe_ptr__ign: Source address in the task's address space. > + * @tsk: The task whose address space will be used > + * @flags: The only supported flag is BPF_F_PAD_ZEROS > + * > + * Copies a NULL-terminated string from a task's address space to BPF space. there is no "BPF space", really... maybe "copies string into *dst* buffer" > + * If user string is too long this will still ensure zero termination in the > + * dst buffer unless buffer size is 0. > + * > + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and > + * memset all of @dst on failure. > + */ > +__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, struct task_struct *tsk, u64 flags) this looks like a long line, does it fit under 100 characters? > +{ > + int count = dst__sz - 1; > + int ret = 0; > + > + if (unlikely(flags & ~BPF_F_PAD_ZEROS)) > + return -EINVAL; > + > + if (unlikely(!dst__sz)) > + return 0; > + > + ret = copy_str_from_process_vm(tsk, (unsigned long)unsafe_ptr__ign, dst, count, 0); > + > + if (ret <= 0) { > + if (flags & BPF_F_PAD_ZEROS) > + memset((char *)dst, 0, dst__sz); nit: no need for (char *) cast? memset takes void *, I think > + return ret; if ret == 0, is that an error? If so, `return ret ?: -EINVAL;` or something along those lines? pw-bot: cr > + } > + > + if (ret < count) { > + if (flags & BPF_F_PAD_ZEROS) > + memset((char *)dst + ret, 0, dst__sz - ret); > + } else { > + ((char *)dst)[count] = '\0'; > + } > + > + return ret + 1; > +} > + > __bpf_kfunc_end_defs(); > > BTF_KFUNCS_START(generic_btf_ids) > @@ -3164,6 +3209,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW) > BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) > +BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE) > BTF_ID_FLAGS(func, bpf_get_kmem_cache) > BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) > BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) > diff --git a/mm/memory.c b/mm/memory.c > index 75c2dfd04f72..514490bd7d6d 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -6673,6 +6673,75 @@ static int __access_remote_vm(struct mm_struct *mm, unsigned long addr, > return buf - old_buf; > } > > +/* > + * Copy a string from another process's address space as given in mm. > + * Don't return partial results. If there is any error return -EFAULT. What does "don't return partial results" mean? What happens if we read part of a string and then fail to read the rest? > + */ > +static int __copy_str_from_remote_vm(struct mm_struct *mm, unsigned long addr, > + void *buf, int len, unsigned int gup_flags) > +{ > + void *old_buf = buf; > + int err = 0; > + > + if (mmap_read_lock_killable(mm)) > + return -EFAULT; > + > + /* Untag the address before looking up the VMA */ > + addr = untagged_addr_remote(mm, addr); > + > + /* Avoid triggering the temporary warning in __get_user_pages */ > + if (!vma_lookup(mm, addr)) { > + mmap_read_unlock(mm); > + return -EFAULT; maybe let's do (so that we do mmap_read_unlock in just one place) err = -EFAULT; goto err_out; and then see below > + } > + > + while (len) { > + int bytes, offset, retval; > + void *maddr; > + struct vm_area_struct *vma = NULL; > + struct page *page = get_user_page_vma_remote(mm, addr, > + gup_flags, &vma); > + nit: I'd split page declaration and assignment and kept get_user_page_vma_remote() invocation on a single line > + if (IS_ERR(page)) { > + /* > + * Treat as a total failure for now until we decide how > + * to handle the CONFIG_HAVE_IOREMAP_PROT case and > + * stack expansion. > + */ > + err = -EFAULT; > + break; > + } > + > + bytes = len; > + offset = addr & (PAGE_SIZE - 1); > + if (bytes > PAGE_SIZE - offset) > + bytes = PAGE_SIZE - offset; > + > + maddr = kmap_local_page(page); > + retval = strncpy_from_user(buf, (const char __user *)addr, bytes); you are not using maddr... that seems wrong (even if it works due to how kmap_local_page is currently implemented) > + unmap_and_put_page(page, maddr); > + > + if (retval < 0) { > + err = retval; > + break; > + } > + > + len -= retval; > + buf += retval; > + addr += retval; > + > + /* Found the end of the string */ > + if (retval < bytes) > + break; > + } err_out: here > + mmap_read_unlock(mm); > + > + if (err) > + return err; > + > + return buf - old_buf; > +} > + > /** > * access_remote_vm - access another process' address space > * @mm: the mm_struct of the target address space > @@ -6714,6 +6783,38 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, > } > EXPORT_SYMBOL_GPL(access_process_vm); > > +/** > + * copy_str_from_process_vm - copy a string from another process's address space. > + * @tsk: the task of the target address space > + * @addr: start address to access access -> read from > + * @buf: source or destination buffer for this api it's always the destination, right? > + * @len: number of bytes to transfer > + * @gup_flags: flags modifying lookup behaviour > + * > + * The caller must hold a reference on @mm. > + * > + * Return: number of bytes copied from source to destination. If the string > + * is shorter than @len then return the length of the string. and if the string is longer than @len, then what happens? we should either specify or drop the "if string is shorter bit" and make it unambiguous whether terminating zero is included or not > + * On any error, return -EFAULT. > + */ > +int copy_str_from_process_vm(struct task_struct *tsk, unsigned long addr, > + void *buf, int len, unsigned int gup_flags) > +{ > + struct mm_struct *mm; > + int ret; > + > + mm = get_task_mm(tsk); > + if (!mm) > + return -EFAULT; > + > + ret = __copy_str_from_remote_vm(mm, addr, buf, len, gup_flags); > + > + mmput(mm); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(copy_str_from_process_vm); > + > /* > * Print the name of a VMA. > */ > -- > 2.43.5 >