Adds the machinery needed by kexec_file_load, except actually loading a kernel and initrd. elf64_apply_relocate_add was extended to support relative symbols. This is necessary because before relocation, the module loading mechanism adjusts Elf64_Sym.st_value to point to the absolute memory address, while the kexec purgatory relocation code does that during relocation. We also add relocation types used by the purgatory. Cc: kexec at lists.infradead.org Cc: linux-kernel at vger.kernel.org --- arch/powerpc/Kconfig | 13 +++ arch/powerpc/include/asm/elf_util.h | 1 + arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + arch/powerpc/kernel/Makefile | 4 +- arch/powerpc/kernel/elf_util_64.c | 83 +++++++++++++++++++- arch/powerpc/kernel/machine_kexec_64.c | 139 +++++++++++++++++++++++++++++++++ arch/powerpc/kernel/module_64.c | 5 +- 9 files changed, 245 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464d9fea..3ed5770b89e4 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -457,6 +457,19 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + depends on PPC64 + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + help + This is a new version of the kexec system call. This call is + file based and takes in file descriptors as system call arguments + for kernel and initramfs as opposed to a list of segments as is the + case for the older kexec call. + config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) diff --git a/arch/powerpc/include/asm/elf_util.h b/arch/powerpc/include/asm/elf_util.h index 9cd24bc528b1..41a172077f9e 100644 --- a/arch/powerpc/include/asm/elf_util.h +++ b/arch/powerpc/include/asm/elf_util.h @@ -86,6 +86,7 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, const char *strtab, const Elf64_Rela *rela, unsigned int num_rela, void *syms_base, void *loc_base, Elf64_Addr addr_base, + bool relative_symbols, bool check_symbols, const char *obj_name); #endif /* _ASM_POWERPC_ELF_UTIL_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 2fc5d4db503c..4b369d83fe9c 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -386,3 +386,4 @@ SYSCALL(mlock2) SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) +SYSCALL(kexec_file_load) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index cf12c580f6b2..a01e97d3f305 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 382 +#define NR_syscalls 383 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e9f5f41aa55a..2f26335a3c42 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 8a53fccaa053..c937b8074464 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -124,9 +124,11 @@ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) obj-y += iomap.o endif -ifeq ($(CONFIG_MODULES)$(CONFIG_WORD_SIZE),y64) +ifneq ($(CONFIG_MODULES)$(CONFIG_KEXEC_FILE),) +ifeq ($(CONFIG_WORD_SIZE),64) obj-y += elf_util.o elf_util_64.o endif +endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o diff --git a/arch/powerpc/kernel/elf_util_64.c b/arch/powerpc/kernel/elf_util_64.c index cde0420add9e..287d381a1a86 100644 --- a/arch/powerpc/kernel/elf_util_64.c +++ b/arch/powerpc/kernel/elf_util_64.c @@ -74,6 +74,8 @@ static void squash_toc_save_inst(const char *name, unsigned long addr) { } * @syms_base: Contents of the associated symbol table. * @loc_base: Contents of the section to which relocations apply. * @addr_base: The address where the section will be loaded in memory. + * @relative_symbols: Are the symbols' st_value members relative? + * @check_symbols: Fail if an unexpected symbol is found? * @obj_name: The name of the ELF binary, for information messages. * * Applies RELA relocations to an ELF file already at its final location @@ -84,16 +86,19 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, const char *strtab, const Elf64_Rela *rela, unsigned int num_rela, void *syms_base, void *loc_base, Elf64_Addr addr_base, + bool relative_symbols, bool check_symbols, const char *obj_name) { unsigned int i; unsigned long *location; unsigned long address; + unsigned long sec_base; unsigned long value; const char *name; Elf64_Sym *sym; for (i = 0; i < num_rela; i++) { + /* * rels[i].r_offset contains the byte offset from the beginning * of section to the storage unit affected. @@ -121,8 +126,34 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, name, (unsigned long)sym->st_value, (long)rela[i].r_addend); + if (check_symbols) { + /* TOC symbols appear as undefined but should be + * resolved as well, so allow them to be processed. */ + if (sym->st_shndx == SHN_UNDEF && + strcmp(name, ".TOC.") != 0) { + pr_err("Undefined symbol: %s\n", name); + return -ENOEXEC; + } else if (sym->st_shndx == SHN_COMMON) { + pr_err("Symbol '%s' in common section.\n", name); + return -ENOEXEC; + } + } + + if (relative_symbols && sym->st_shndx != SHN_ABS) { + if (sym->st_shndx >= elf_info->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else { + struct elf_shdr *sechdrs = elf_info->sechdrs; + + sec_base = sechdrs[sym->st_shndx].sh_addr; + } + } else + sec_base = 0; + /* `Everything is relative'. */ - value = sym->st_value + rela[i].r_addend; + value = sym->st_value + sec_base + rela[i].r_addend; switch (ELF64_R_TYPE(rela[i].r_info)) { case R_PPC64_ADDR32: @@ -135,6 +166,10 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, *(unsigned long *)location = value; break; + case R_PPC64_REL32: + *(uint32_t *)location = value - (uint32_t)(uint64_t)location; + break; + case R_PPC64_TOC: *(unsigned long *)location = my_r2(elf_info); break; @@ -186,6 +221,14 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, | (value & 0xfffc); break; + case R_PPC64_TOC16_HI: + /* Subtract TOC pointer */ + value -= my_r2(elf_info); + value = value >> 16; + *((uint16_t *) location) + = (*((uint16_t *) location) & ~0xffff) + | (value & 0xffff); + case R_PPC64_TOC16_HA: /* Subtract TOC pointer */ value -= my_r2(elf_info); @@ -195,6 +238,21 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, | (value & 0xffff); break; + case R_PPC64_REL14: + /* Convert value to relative */ + value -= address; + if (value + 0x8000 > 0xffff || (value & 3) != 0) { + pr_err("%s: REL14 %li out of range!\n", obj_name, + (long int)value); + return -ENOEXEC; + } + + /* Only replace bits 2 through 16 */ + *(uint32_t *)location + = (*(uint32_t *)location & ~0xfffc) + | (value & 0xfffc); + break; + case R_PPC_REL24: /* FIXME: Handle weak symbols here --RR */ if (sym->st_shndx == SHN_UNDEF) { @@ -263,6 +321,29 @@ int elf64_apply_relocate_add(const struct elf_info *elf_info, ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value); break; + case R_PPC64_ADDR16_LO: + *(uint16_t *)location = value & 0xffff; + break; + + case R_PPC64_ADDR16_HI: + *(uint16_t *)location = (value >> 16) & 0xffff; + break; + + case R_PPC64_ADDR16_HA: + *(uint16_t *)location = (((value + 0x8000) >> 16) & + 0xffff); + break; + + case R_PPC64_ADDR16_HIGHER: + *(uint16_t *)location = (((uint64_t)value >> 32) & + 0xffff); + break; + + case R_PPC64_ADDR16_HIGHEST: + *(uint16_t *)location = (((uint64_t)value >> 48) & + 0xffff); + break; + case R_PPC64_REL16_HA: /* Subtract location pointer */ value -= address; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index b8c202d63ecb..a3bd8cd53776 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -9,6 +9,7 @@ * Version 2. See the file COPYING for more details. */ +#define pr_fmt(fmt) "kexec: " fmt #include <linux/kexec.h> #include <linux/smp.h> @@ -18,6 +19,7 @@ #include <linux/kernel.h> #include <linux/cpu.h> #include <linux/hardirq.h> +#include <linux/memblock.h> #include <asm/page.h> #include <asm/current.h> @@ -30,6 +32,10 @@ #include <asm/smp.h> #include <asm/hw_breakpoint.h> +#ifdef CONFIG_KEXEC_FILE +static struct kexec_file_ops *kexec_file_loaders[] = { }; +#endif + #ifdef CONFIG_PPC_BOOK3E int default_machine_kexec_prepare(struct kimage *image) { @@ -426,3 +432,136 @@ static int __init export_htab_values(void) } late_initcall(export_htab_values); #endif /* CONFIG_PPC_STD_MMU_64 */ + +/* arch-dependent functionality required for implementing kexec_file_load() syscall */ +#ifdef CONFIG_KEXEC_FILE +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image->image_loader_data); +} + +/** + * arch_walk_system_ram - call func(data) for each unreserved memory block + * @start: Minimum address. + * @end: Maximum address. + * @top_down: Starts from the highest address? + * @data: Argument to pass to @func. + * @func: Function to call for each memory block. + * + * This function is used by kexec_add_buffer and kexec_locate_mem_hole + * to find unreserved memory to load kexec segments into. + */ +int arch_walk_system_ram(unsigned long start, unsigned long end, bool top_down, + void *data, int (*func)(u64, u64, void *)) +{ + int ret = -1; + u64 i; + phys_addr_t mstart, mend; + + if (top_down) { + for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, + &mstart, &mend, NULL) { + if (end < mstart) + continue; + else if (start > mend) + break; + + ret = func(mstart, mend, data); + if (ret) + break; + } + } else { + for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, + NULL) { + if (end < mstart) + break; + else if (start > mend) + continue; + + ret = func(mstart, mend, data); + if (ret) + break; + } + } + + return ret; +} + +/** + * arch_kexec_apply_relocations_add - apply purgatory relocations + * @ehdr: Pointer to ELF headers. + * @sechdrs: Pointer to section headers. + * @relsec: Section index of SHT_RELA section. + * + * Elf64_Shdr.sh_offset has been modified to keep the pointer to the section + * contents, while Elf64_Shdr.sh_addr points to the final adress of the + * section in memory. + */ +int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, + Elf64_Shdr *sechdrs, unsigned int relsec) +{ + /* Section containing the relocation entries. */ + Elf64_Shdr *rel_section = &sechdrs[relsec]; + const Elf64_Rela *rela = (const Elf64_Rela *) rel_section->sh_offset; + unsigned int num_rela = rel_section->sh_size / sizeof(Elf64_Rela); + /* Section to which relocations apply. */ + Elf64_Shdr *target_section = &sechdrs[rel_section->sh_info]; + /* Associated symbol table. */ + Elf64_Shdr *symtabsec = &sechdrs[rel_section->sh_link]; + void *syms_base = (void *) symtabsec->sh_offset; + void *loc_base = (void *) target_section->sh_offset; + Elf64_Addr addr_base = target_section->sh_addr; + struct elf_info elf_info; + const char *strtab; + + if (symtabsec->sh_link >= ehdr->e_shnum) { + /* Invalid strtab section number */ + pr_err("Invalid string table section index %d\n", + symtabsec->sh_link); + return -ENOEXEC; + } + /* String table for the associated symbol table. */ + strtab = (const char *) sechdrs[symtabsec->sh_link].sh_offset; + + elf_init_elf_info(ehdr, sechdrs, &elf_info); + + return elf64_apply_relocate_add(&elf_info, strtab, rela, num_rela, + syms_base, loc_base, addr_base, + true, true, "kexec purgatory"); +} +#endif /* CONFIG_KEXEC_FILE */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index a3cb0f6e83bb..15b822867f8d 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -442,6 +442,9 @@ int restore_r2(u32 *instruction, const char *obj_name) * When this function is called, the module is already at its final location in * memory, so Elf64_Shdr.sh_addr can be used for accessing the section * contents as well as the base address for relocations. + * + * Also, simplify_symbols already changed all symbols' st_value members + * to absolute addresses. */ int apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, @@ -471,7 +474,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return elf64_apply_relocate_add(&me->arch.elf_info, strtab, rela, num_rela, syms_base, (void *) addr_base, - addr_base, me->name); + addr_base, false, false, me->name); } #ifdef CONFIG_DYNAMIC_FTRACE -- 1.9.1