On Sun, Jan 26, 2025 at 8:39 AM Jordan Rome <linux@xxxxxxxxxxxxxx> wrote: > > Similar to `access_process_vm` but specific to strings. > Also chunks reads by page and utilizes `strscpy` > for handling null termination. > > Signed-off-by: Jordan Rome <linux@xxxxxxxxxxxxxx> > --- > include/linux/mm.h | 3 ++ > mm/memory.c | 118 +++++++++++++++++++++++++++++++++++++++++++++ > mm/nommu.c | 67 +++++++++++++++++++++++++ > 3 files changed, 188 insertions(+) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index f02925447e59..f3a05b3eb2f2 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2485,6 +2485,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, > extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, > void *buf, int len, unsigned int gup_flags); > > +extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, > + void *buf, int len, unsigned int gup_flags); > + > long get_user_pages_remote(struct mm_struct *mm, > unsigned long start, unsigned long nr_pages, > unsigned int gup_flags, struct page **pages, > diff --git a/mm/memory.c b/mm/memory.c > index 398c031be9ba..e1ed5095b258 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -6714,6 +6714,124 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, > } > EXPORT_SYMBOL_GPL(access_process_vm); > > +/* > + * Copy a string from another process's address space as given in mm. > + * If there is any error return -EFAULT. > + */ > +static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, > + void *buf, int len, unsigned int gup_flags) > +{ > + void *old_buf = buf; > + int err = 0; > + > + if (mmap_read_lock_killable(mm)) > + return -EFAULT; > + > + /* Untag the address before looking up the VMA */ > + addr = untagged_addr_remote(mm, addr); > + > + /* Avoid triggering the temporary warning in __get_user_pages */ > + if (!vma_lookup(mm, addr)) { > + err = -EFAULT; > + goto out; > + } > + > + while (len) { > + int bytes, offset, retval; > + void *maddr; > + struct page *page; > + struct vm_area_struct *vma = NULL; > + > + page = get_user_page_vma_remote(mm, addr, gup_flags, &vma); > + > + if (IS_ERR(page)) { > + /* > + * Treat as a total failure for now until we decide how > + * to handle the CONFIG_HAVE_IOREMAP_PROT case and > + * stack expansion. > + */ > + err = -EFAULT; > + goto out; > + } > + > + bytes = len; > + offset = addr & (PAGE_SIZE - 1); > + if (bytes > PAGE_SIZE - offset) > + bytes = PAGE_SIZE - offset; > + > + maddr = kmap_local_page(page); > + retval = strscpy(buf, maddr + offset, bytes); > + unmap_and_put_page(page, maddr); > + > + if (retval > -1) { nit: retval >= 0 is more conventional, -1 has no special meaning here (it's -EPERM, if anything), zero does > + /* Found the end of the string */ > + buf += retval; > + goto out; > + } > + > + retval = bytes - 1; > + buf += retval; > + > + if (bytes == len) > + goto out; > + > + /* > + * Because strscpy always NUL terminates we need to > + * copy the last byte in the page if we are going to > + * load more pages > + */ > + addr += retval; > + len -= retval; > + copy_from_user_page(vma, > + page, > + addr, > + buf, > + maddr + (PAGE_SIZE - 1), > + 1); just realized, you've already unmap_and_put_page(), and yet you are trying to access it here again. Seems like you'll need to delay that unmap_and_put also, stylistical nit: you have tons of short arguments, keep them on smaller number of lines, it's taking way too much vertical space for what it is Also, I was worried about non-zero terminated buf here. It still can happen if subsequent get_user_page_vma_remote() fails and we exit early, but we'll end up returning -EFAULT, so perhaps that's not a problem. On the other hand, it's trivial to do buf[1] = '\0'; to keep that buf always zero-terminated, so maybe I'd do that... And if we do buf[0] = '\0'; at the very beginning, we can document that copy_remote_vm_str() leaves valid zero-terminated buffer of whatever it managed to read before EFAULT, which isn't a bad property that basically comes for free, no? pw-bot: cr > + len -= 1; > + buf += 1; > + addr += 1; > + } > + > +out: > + mmap_read_unlock(mm); > + if (err) > + return err; > + > + return buf - old_buf; > +} > + > +/** > + * copy_remote_vm_str - copy a string from another process's address space. > + * @tsk: the task of the target address space > + * @addr: start address to read from > + * @buf: destination buffer > + * @len: number of bytes to copy > + * @gup_flags: flags modifying lookup behaviour > + * > + * The caller must hold a reference on @mm. > + * > + * Return: number of bytes copied from @addr (source) to @buf (destination); > + * not including the trailing NUL. On any error, return -EFAULT. "On success, leaves a properly NUL-terminated buffer." (or if we do adjustments I suggested above we can even say "Even on error will leave properly NUL-terminated buffer with contents that was successfully read before -EFAULT", or something along those lines). > + */ > +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, > + void *buf, int len, unsigned int gup_flags) > +{ > + struct mm_struct *mm; > + int ret; > + > + mm = get_task_mm(tsk); > + if (!mm) > + return -EFAULT; > + > + ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags); > + > + mmput(mm); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(copy_remote_vm_str); > + > /* > * Print the name of a VMA. > */ > diff --git a/mm/nommu.c b/mm/nommu.c > index 9cb6e99215e2..ce24ea829c73 100644 > --- a/mm/nommu.c > +++ b/mm/nommu.c > @@ -1701,6 +1701,73 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in > } > EXPORT_SYMBOL_GPL(access_process_vm); > > +/* > + * Copy a string from another process's address space as given in mm. > + * If there is any error return -EFAULT. > + */ > +static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr, > + void *buf, int len) > +{ > + int ret; > + struct vm_area_struct *vma; > + > + if (mmap_read_lock_killable(mm)) > + return -EFAULT; > + > + /* the access must start within one of the target process's mappings */ > + vma = find_vma(mm, addr); > + if (vma) { > + /* don't overrun this mapping */ > + if (addr + len >= vma->vm_end) Should we worry about overflows here? check_add_overflow() maybe? > + len = vma->vm_end - addr; > + > + /* only read mappings where it is permitted */ > + if (vma->vm_flags & VM_MAYREAD) { > + ret = strscpy(buf, (char *)addr, len); > + if (ret < 0) > + ret = len - 1; > + } else { > + ret = -EFAULT; > + } > + } else { > + ret = -EFAULT; > + } > + nit: might be cleaner to have `ret = -EFAULT;` before `if (vma)` and only set ret on success/overflow? > + mmap_read_unlock(mm); > + return ret; > +} > + > +/** > + * copy_remote_vm_str - copy a string from another process's address space. > + * @tsk: the task of the target address space > + * @addr: start address to read from > + * @buf: destination buffer > + * @len: number of bytes to copy > + * @gup_flags: flags modifying lookup behaviour (unused) > + * > + * The caller must hold a reference on @mm. > + * > + * Return: number of bytes copied from @addr (source) to @buf (destination); > + * not including the trailing NUL. On any error, return -EFAULT. > + */ > +int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, > + void *buf, int len, unsigned int gup_flags) > +{ > + struct mm_struct *mm; > + int ret; > + > + mm = get_task_mm(tsk); > + if (!mm) > + return -EFAULT; > + > + ret = __copy_remote_vm_str(mm, addr, buf, len); > + > + mmput(mm); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(copy_remote_vm_str); > + > /** > * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode > * @inode: The inode to check > -- > 2.43.5 > >