On Wed, Nov 24, 2021 at 07:20:24PM +0000, Catalin Marinas wrote: > +++ b/fs/btrfs/ioctl.c > @@ -2223,7 +2223,8 @@ static noinline int search_ioctl(struct inode *inode, > > while (1) { > ret = -EFAULT; > - if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset)) > + if (fault_in_exact_writeable(ubuf + sk_offset, > + *buf_size - sk_offset)) > break; > > ret = btrfs_search_forward(root, &key, path, sk->min_transid); Couldn't we avoid all of this nastiness by doing ... @@ -2121,10 +2121,9 @@ static noinline int copy_to_sk(struct btrfs_path *path, * problem. Otherwise we'll fault and then copy the buffer in * properly this next time through */ - if (copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh))) { - ret = 0; + ret = __copy_to_user_nofault(ubuf + *sk_offset, &sh, sizeof(sh)); + if (ret) goto out; - } *sk_offset += sizeof(sh); @@ -2196,6 +2195,7 @@ static noinline int search_ioctl(struct inode *inode, int ret; int num_found = 0; unsigned long sk_offset = 0; + unsigned long next_offset = 0; if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { *buf_size = sizeof(struct btrfs_ioctl_search_header); @@ -2223,7 +2223,8 @@ static noinline int search_ioctl(struct inode *inode, while (1) { ret = -EFAULT; - if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset)) + if (fault_in_writeable(ubuf + sk_offset + next_offset, + *buf_size - sk_offset - next_offset)) break; ret = btrfs_search_forward(root, &key, path, sk->min_transid); @@ -2235,11 +2236,12 @@ static noinline int search_ioctl(struct inode *inode, ret = copy_to_sk(path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); - if (ret) + if (ret > 0) + next_offset = ret; + else if (ret < 0) break; - } - if (ret > 0) + if (ret == -ENOSPC || ret > 0) ret = 0; err: sk->nr_items = num_found; (not shown: the tedious bits where the existing 'ret = 1' are converted to 'ret = -ENOSPC' in copy_to_sk()) (where __copy_to_user_nofault() is a new function that does exactly what copy_to_user_nofault() does, but returns the number of bytes copied) That way, the existing fault_in_writable() will get the fault, and we don't need to probe every 16 bytes.