Al Viro <viro@xxxxxxxxxxxxxxxxxx> writes: > [I'm going to send a pull request tomorrow if nobody yells; > please review and test - it seems to work fine here, but extra > eyes and extra testing would be very welcome] > > passing kmap_local_page() result to __kernel_write() is unsafe - > random ->write_iter() might (and 9p one does) get unhappy when > passed ITER_KVEC with pointer that came from kmap_local_page(). > > Fix by providing a variant of __kernel_write() that takes an iov_iter > from caller (__kernel_write() becomes a trivial wrapper) and adding > dump_emit_page() that parallels dump_emit(), except that instead of > __kernel_write() it uses __kernel_write_iter() with ITER_BVEC source. Acked-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> >From a 10,000 foot view this makes sense. > Fixes: 3159ed57792b "fs/coredump: use kmap_local_page()" > Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx> > --- > diff --git a/fs/coredump.c b/fs/coredump.c > index 9f4aae202109..09dbc981ad5c 100644 > --- a/fs/coredump.c > +++ b/fs/coredump.c > @@ -832,6 +832,38 @@ static int __dump_skip(struct coredump_params *cprm, size_t nr) > } > } > > +static int dump_emit_page(struct coredump_params *cprm, struct page *page) > +{ > + struct bio_vec bvec = { > + .bv_page = page, > + .bv_offset = 0, > + .bv_len = PAGE_SIZE, > + }; > + struct iov_iter iter; > + struct file *file = cprm->file; > + loff_t pos = file->f_pos; > + ssize_t n; > + > + if (cprm->to_skip) { > + if (!__dump_skip(cprm, cprm->to_skip)) > + return 0; > + cprm->to_skip = 0; > + } > + if (cprm->written + PAGE_SIZE > cprm->limit) > + return 0; > + if (dump_interrupted()) > + return 0; > + iov_iter_bvec(&iter, WRITE, &bvec, 1, PAGE_SIZE); > + n = __kernel_write_iter(cprm->file, &iter, &pos); > + if (n != PAGE_SIZE) > + return 0; > + file->f_pos = pos; > + cprm->written += PAGE_SIZE; > + cprm->pos += PAGE_SIZE; > + > + return 1; > +} > + > int dump_emit(struct coredump_params *cprm, const void *addr, int nr) > { > if (cprm->to_skip) { > @@ -863,7 +895,6 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, > > for (addr = start; addr < start + len; addr += PAGE_SIZE) { > struct page *page; > - int stop; > > /* > * To avoid having to allocate page tables for virtual address > @@ -874,12 +905,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, > */ > page = get_dump_page(addr); > if (page) { > - void *kaddr = kmap_local_page(page); > - > - stop = !dump_emit(cprm, kaddr, PAGE_SIZE); > - kunmap_local(kaddr); > - put_page(page); > - if (stop) > + if (!dump_emit_page(cprm, page)) > return 0; > } else { > dump_skip(cprm, PAGE_SIZE); > diff --git a/fs/internal.h b/fs/internal.h > index 87e96b9024ce..3e206d3e317c 100644 > --- a/fs/internal.h > +++ b/fs/internal.h > @@ -16,6 +16,7 @@ struct shrink_control; > struct fs_context; > struct user_namespace; > struct pipe_inode_info; > +struct iov_iter; > > /* > * block/bdev.c > @@ -221,3 +222,5 @@ ssize_t do_getxattr(struct user_namespace *mnt_userns, > int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); > int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, > struct xattr_ctx *ctx); > + > +ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos); > diff --git a/fs/read_write.c b/fs/read_write.c > index 1a261dcf1778..328ce8cf9a85 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -496,14 +496,9 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t > } > > /* caller is responsible for file_start_write/file_end_write */ > -ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) > +ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos) > { > - struct kvec iov = { > - .iov_base = (void *)buf, > - .iov_len = min_t(size_t, count, MAX_RW_COUNT), > - }; > struct kiocb kiocb; > - struct iov_iter iter; > ssize_t ret; > > if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) > @@ -519,8 +514,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t > > init_sync_kiocb(&kiocb, file); > kiocb.ki_pos = pos ? *pos : 0; > - iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); > - ret = file->f_op->write_iter(&kiocb, &iter); > + ret = file->f_op->write_iter(&kiocb, from); > if (ret > 0) { > if (pos) > *pos = kiocb.ki_pos; > @@ -530,6 +524,18 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t > inc_syscw(current); > return ret; > } > + > +/* caller is responsible for file_start_write/file_end_write */ > +ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) > +{ > + struct kvec iov = { > + .iov_base = (void *)buf, > + .iov_len = min_t(size_t, count, MAX_RW_COUNT), > + }; > + struct iov_iter iter; > + iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); > + return __kernel_write_iter(file, &iter, pos); > +} > /* > * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", > * but autofs is one of the few internal kernel users that actually