On Thu, Aug 11, 2016 at 08:03:58PM -0300, Thiago Jung Bauermann wrote: > From: AKASHI Takahiro <takahiro.akashi at linaro.org> > > Device tree blob must be passed to a second kernel on DTB-capable > archs, like powerpc and arm64, but the current kernel interface > lacks this support. > > This patch extends kexec_file_load system call by adding an extra > argument to this syscall so that an arbitrary number of file descriptors > can be handed out from user space to the kernel. > > long sys_kexec_file_load(int kernel_fd, int initrd_fd, > unsigned long cmdline_len, > const char __user *cmdline_ptr, > unsigned long flags, > const struct kexec_fdset __user *ufdset); > > If KEXEC_FILE_EXTRA_FDS is set to the "flags" argument, the "ufdset" > argument points to the following struct buffer: > > struct kexec_fdset { > int nr_fds; > struct kexec_file_fd fds[0]; > } > > Signed-off-by: AKASHI Takahiro <takahiro.akashi at linaro.org> > Signed-off-by: Thiago Jung Bauermann <bauerman at linux.vnet.ibm.com> > --- > include/linux/fs.h | 1 + > include/linux/kexec.h | 7 ++-- > include/linux/syscalls.h | 4 ++- > include/uapi/linux/kexec.h | 22 ++++++++++++ > kernel/kexec_file.c | 83 ++++++++++++++++++++++++++++++++++++++++++---- > 5 files changed, 108 insertions(+), 9 deletions(-) > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 3523bf62f328..847d9c31f428 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -2656,6 +2656,7 @@ extern int do_pipe_flags(int *, int); > id(MODULE, kernel-module) \ > id(KEXEC_IMAGE, kexec-image) \ > id(KEXEC_INITRAMFS, kexec-initramfs) \ > + id(KEXEC_PARTIAL_DTB, kexec-partial-dtb) \ The backspace is over-indented? > id(POLICY, security-policy) \ > id(MAX_ID, ) > > diff --git a/include/linux/kexec.h b/include/linux/kexec.h > index 4f85d284ed0b..29202935055d 100644 > --- a/include/linux/kexec.h > +++ b/include/linux/kexec.h > @@ -148,7 +148,10 @@ struct kexec_file_ops { > kexec_verify_sig_t *verify_sig; > #endif > }; > -#endif > + > +int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf, > + unsigned long size); > +#endif /* CONFIG_KEXEC_FILE */ > > struct kimage { > kimage_entry_t head; > @@ -280,7 +283,7 @@ extern int kexec_load_disabled; > > /* List of defined/legal kexec file flags */ > #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ > - KEXEC_FILE_NO_INITRAMFS) > + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_EXTRA_FDS) > > #define VMCOREINFO_BYTES (4096) > #define VMCOREINFO_NOTE_NAME "VMCOREINFO" > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index d02239022bd0..fc072bdb74e3 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -66,6 +66,7 @@ struct perf_event_attr; > struct file_handle; > struct sigaltstack; > union bpf_attr; > +struct kexec_fdset; > > #include <linux/types.h> > #include <linux/aio_abi.h> > @@ -321,7 +322,8 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, > asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd, > unsigned long cmdline_len, > const char __user *cmdline_ptr, > - unsigned long flags); > + unsigned long flags, > + const struct kexec_fdset __user *ufdset); > > asmlinkage long sys_exit(int error_code); > asmlinkage long sys_exit_group(int error_code); > diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h > index aae5ebf2022b..6279be79efba 100644 > --- a/include/uapi/linux/kexec.h > +++ b/include/uapi/linux/kexec.h > @@ -23,6 +23,28 @@ > #define KEXEC_FILE_UNLOAD 0x00000001 > #define KEXEC_FILE_ON_CRASH 0x00000002 > #define KEXEC_FILE_NO_INITRAMFS 0x00000004 > +#define KEXEC_FILE_EXTRA_FDS 0x00000008 > + > +enum kexec_file_type { > + KEXEC_FILE_TYPE_KERNEL, > + KEXEC_FILE_TYPE_INITRAMFS, > + > + /* > + * Device Tree Blob containing just the nodes and properties that > + * the kexec_file_load caller wants to add or modify. > + */ > + KEXEC_FILE_TYPE_PARTIAL_DTB, > +}; > + > +struct kexec_file_fd { > + enum kexec_file_type type; > + int fd; > +}; > + > +struct kexec_fdset { > + int nr_fds; > + struct kexec_file_fd fds[0]; > +}; > > /* These values match the ELF architecture values. > * Unless there is a good reason that should continue to be the case. > diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c > index 113af2f219b9..d6803dd884e2 100644 > --- a/kernel/kexec_file.c > +++ b/kernel/kexec_file.c > @@ -25,6 +25,9 @@ > #include <linux/vmalloc.h> > #include "kexec_internal.h" > > +#define MAX_FDSET_SIZE (sizeof(struct kexec_fdset) + \ > + KEXEC_SEGMENT_MAX * sizeof(struct kexec_file_fd)) > + > /* > * Declare these symbols weak so that if architecture provides a purgatory, > * these will be overridden. > @@ -116,6 +119,22 @@ void kimage_file_post_load_cleanup(struct kimage *image) > image->image_loader_data = NULL; > } > > +/** > + * arch_kexec_verify_buffer() - check that the given kexec file is valid > + * > + * Device trees in particular can contain properties that may make the kernel > + * execute code that it wasn't supposed to (e.g., use the wrong entry point > + * when calling firmware functions). Because of this, the kernel needs to > + * verify that it is safe to use the device tree blob passed from userspace. > + * > + * Return: 0 on success, negative errno on error. > + */ > +int __weak arch_kexec_verify_buffer(enum kexec_file_type type, const void *buf, > + unsigned long size) > +{ > + return -EINVAL; > +} > + > /* > * In file mode list of segments is prepared by kernel. Copy relevant > * data from user space, do error checking, prepare segment list > @@ -123,7 +142,8 @@ void kimage_file_post_load_cleanup(struct kimage *image) > static int > kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, > const char __user *cmdline_ptr, > - unsigned long cmdline_len, unsigned flags) > + unsigned long cmdline_len, unsigned long flags, > + const struct kexec_fdset __user *ufdset) > { > int ret = 0; > void *ldata; > @@ -160,6 +180,55 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, > image->initrd_buf_len = size; > } > > + if (flags & KEXEC_FILE_EXTRA_FDS) { > + int nr_fds, i; > + size_t fdset_size; > + char fdset_buf[MAX_FDSET_SIZE]; Do we really want this on the stack? I presume the size is not large > + struct kexec_fdset *fdset = (struct kexec_fdset *) fdset_buf; > + > + ret = copy_from_user(&nr_fds, ufdset, sizeof(int)); > + if (ret) { > + ret = -EFAULT; > + goto out; > + } > + > + if (nr_fds > KEXEC_SEGMENT_MAX) { We need an nr_fds < 0 check as well > + ret = -E2BIG; > + goto out; > + } > + > + fdset_size = sizeof(struct kexec_fdset) > + + nr_fds * sizeof(struct kexec_file_fd); > + > + ret = copy_from_user(fdset, ufdset, fdset_size); Can the user change nr_fds between the two copy_from_users, ideally not, but we should validate it. > + if (ret) { > + ret = -EFAULT; > + goto out; > + } > + > + for (i = 0; i < fdset->nr_fds; i++) { > + if (fdset->fds[i].type == KEXEC_FILE_TYPE_PARTIAL_DTB) { > + ret = kernel_read_file_from_fd(fdset->fds[i].fd, > + &image->dtb_buf, &size, INT_MAX, > + READING_KEXEC_PARTIAL_DTB); > + if (ret) > + goto out; > + image->dtb_buf_len = size; > + > + ret = arch_kexec_verify_buffer(KEXEC_FILE_TYPE_PARTIAL_DTB, > + image->dtb_buf, > + image->dtb_buf_len); > + if (ret) > + goto out; > + } else { > + pr_debug("unknown file type %d failed.\n", > + fdset->fds[i].type); > + ret = -EINVAL; > + goto out; > + } > + } > + } > + > if (cmdline_len) { > image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL); > if (!image->cmdline_buf) { > @@ -202,7 +271,8 @@ out: > static int > kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, > int initrd_fd, const char __user *cmdline_ptr, > - unsigned long cmdline_len, unsigned long flags) > + unsigned long cmdline_len, unsigned long flags, > + const struct kexec_fdset __user *ufdset) > { > int ret; > struct kimage *image; > @@ -221,7 +291,8 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, > } > > ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, > - cmdline_ptr, cmdline_len, flags); > + cmdline_ptr, cmdline_len, flags, > + ufdset); > if (ret) > goto out_free_image; > > @@ -256,9 +327,9 @@ out_free_image: > return ret; > } > > -SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, > +SYSCALL_DEFINE6(kexec_file_load, int, kernel_fd, int, initrd_fd, > unsigned long, cmdline_len, const char __user *, cmdline_ptr, > - unsigned long, flags) > + unsigned long, flags, const struct kexec_fdset __user *, ufdset) > { > int ret = 0, i; > struct kimage **dest_image, *image; > @@ -295,7 +366,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, > kimage_free(xchg(&kexec_crash_image, NULL)); > > ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, > - cmdline_len, flags); > + cmdline_len, flags, ufdset); > if (ret) > goto out; > Balbir Singh.