OK, here's another go-around. This patch leaves the bzImage itself unmodified, but it changes the payload into an ELF file. That is, the 32-bit decompression/relocation+compressed kernel is now a properly formed ELF file. One thing that fell out of this is that code32_start end up being a pointer to the ELF header rather than an entrypoint. Rather than reproducing Vivek's (?) hack of making the ELF header itself executable, I changed the 16-bit code to check for an ELF magic number at code32_start and use the e_entry to get the actual entrypoint. This seems like approximately the right way of doing this, but I'm not sure how we want to formalize it. It's certainly easier than trying to extract the payload's entry address and copying it to code32_start in the boot_params block, and we need a pointer to the ELF file itself anyway. Thoughts? BTW, this won't apply as-is; I have some mucking-around patches to try and get the linux/elf*.h headers into a bit more order, but that's not ready yet. J --- arch/i386/boot/Makefile | 21 +--- arch/i386/boot/compressed/Makefile | 13 +- arch/i386/boot/compressed/notes.S | 7 + arch/i386/boot/compressed/piggy.S | 10 + arch/i386/boot/compressed/vmlinux.lds | 15 ++ arch/i386/boot/compressed/vmlinux.scr | 10 - arch/i386/boot/header.S | 4 arch/i386/boot/payload.S | 4 arch/i386/boot/setup.ld | 27 ++++- arch/i386/boot/tools/.gitignore | 1 arch/i386/boot/tools/build.c | 168 --------------------------------- include/linux/elf.h | 20 +++ include/linux/elf_boot.h | 16 +++ 13 files changed, 105 insertions(+), 211 deletions(-) =================================================================== --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile @@ -31,6 +31,7 @@ setup-y += a20.o apm.o cmdline.o copy.o setup-y += a20.o apm.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o setup-y += printf.o string.o tty.o video.o version.o voyager.o +setup-y += payload.o # The link order of the video-*.o modules can matter. In particular, # video-vga.o *must* be listed first, followed by video-vesa.o. @@ -39,10 +40,6 @@ setup-y += video-vga.o setup-y += video-vga.o setup-y += video-vesa.o setup-y += video-bios.o - -hostprogs-y := tools/build - -HOSTCFLAGS_build.o := $(LINUXINCLUDE) # --------------------------------------------------------------------------- @@ -65,15 +62,10 @@ AFLAGS := $(CFLAGS) -D__ASSEMBLY__ $(obj)/bzImage: IMAGE_OFFSET := 0x100000 $(obj)/bzImage: EXTRA_CFLAGS := -D__BIG_KERNEL__ $(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ -$(obj)/bzImage: BUILDFLAGS := -b -quiet_cmd_image = BUILD $@ -cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \ - $(obj)/vmlinux.bin $(ROOT_DEV) > $@ +all: $(obj)/bzImage -$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ - $(obj)/vmlinux.bin $(obj)/tools/build FORCE - $(call if_changed,image) +$(obj)/zImage: FORCE @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE @@ -81,8 +73,11 @@ cmd_image = $(obj)/tools/build $(BUILDFL SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -LDFLAGS_setup.elf := -T -$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE +$(obj)/payload.o: EXTRA_AFLAGS := -Wa,-I$(obj) +$(obj)/payload.o: $(src)/kernel.S $(obj)/vmlinux.bin + +LDFLAGS_bzImage := -T +$(obj)/bzImage: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) OBJCOPYFLAGS_setup.bin := -O binary =================================================================== --- a/arch/i386/boot/compressed/Makefile +++ b/arch/i386/boot/compressed/Makefile @@ -4,7 +4,8 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \ +targets := vmlinux vmlinux.bin vmlinux.bin.gz \ + elfhdr.o head.o misc.o notes.o piggy.o \ vmlinux.bin.all vmlinux.relocs EXTRA_AFLAGS := -traditional @@ -17,7 +18,9 @@ CFLAGS := -m32 -D__KERNEL__ $(LINUX_INC $(call cc-option,-fno-stack-protector) LDFLAGS := -m elf_i386 -$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE +OBJS=$(addprefix $(obj)/,elfhdr.o head.o misc.o piggy.o) + +$(obj)/vmlinux: $(src)/vmlinux.lds $(OBJS) FORCE $(call if_changed,ld) @: @@ -44,7 +47,5 @@ else $(call if_changed,gzip) endif -LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T - -$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE - $(call if_changed,ld) +$(obj)/piggy.o: EXTRA_AFLAGS := -Wa,-I$(obj) +$(obj)/piggy.o: $(obj)/vmlinux.bin.gz =================================================================== --- /dev/null +++ b/arch/i386/boot/compressed/notes.S @@ -0,0 +1,7 @@ +#include <linux/elfnote.h> +#include <linux/elf_boot.h> +#include <linux/utsrelease.h> + +ELFNOTE(ELF_NOTE_BOOT, EIN_PROGRAM_NAME, .asciz "Linux") +ELFNOTE(ELF_NOTE_BOOT, EIN_PROGRAM_VERSION, .asciz UTS_RELEASE) +ELFNOTE(ELF_NOTE_BOOT, EIN_ARGUMENT_STYLE, .asciz "Linux") =================================================================== --- /dev/null +++ b/arch/i386/boot/compressed/piggy.S @@ -0,0 +1,10 @@ +.section .data.compressed,"a",@progbits + +.globl input_data, input_len, output_len + +input_len: .long input_data_end - input_data + +input_data: +.incbin "vmlinux.bin.gz" +output_len = .-4 +input_data_end: =================================================================== --- a/arch/i386/boot/compressed/vmlinux.lds +++ b/arch/i386/boot/compressed/vmlinux.lds @@ -1,13 +1,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386" -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +/* OUTPUT_FORMAT("binary") */ +OUTPUT_FORMAT("elf32-i386") OUTPUT_ARCH(i386) ENTRY(startup_32) SECTIONS { - /* Be careful parts of head.S assume startup_32 is at - * address 0. - */ . = 0 ; .text.head : { + *(.elfhdr) _head = . ; *(.text.head) _ehead = . ; @@ -33,6 +32,7 @@ SECTIONS *(.data.*) _edata = . ; } + _filesz = . ; .bss : { _bss = . ; *(.bss) @@ -40,4 +40,11 @@ SECTIONS *(COMMON) _end = . ; } + _memsz = .; + + .notes : { + _notes = . ; + *(.note*) + _notesz = . - _notes ; + } } =================================================================== --- a/arch/i386/boot/compressed/vmlinux.scr +++ /dev/null @@ -1,10 +0,0 @@ -SECTIONS -{ - .data.compressed : { - input_len = .; - LONG(input_data_end - input_data) input_data = .; - *(.data) - output_len = . - 4; - input_data_end = .; - } -} =================================================================== --- a/arch/i386/boot/header.S +++ b/arch/i386/boot/header.S @@ -97,9 +97,9 @@ bugger_off_msg: .section ".header", "a" .globl hdr hdr: -setup_sects: .byte SETUPSECTS +setup_sects: .byte __setup_sects root_flags: .word ROOT_RDONLY -syssize: .long SYSSIZE +syssize: .long kernel_size_para ram_size: .word RAMDISK vid_mode: .word SVGA_MODE root_dev: .word ROOT_DEV =================================================================== --- /dev/null +++ b/arch/i386/boot/payload.S @@ -0,0 +1,4 @@ +.section .kernel,"a",@progbits +.balign 4096 + +.incbin "vmlinux.bin" =================================================================== --- a/arch/i386/boot/setup.ld +++ b/arch/i386/boot/setup.ld @@ -3,7 +3,7 @@ * * Linker script for the i386 setup code */ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_FORMAT("binary") OUTPUT_ARCH(i386) ENTRY(_start) @@ -15,6 +15,7 @@ SECTIONS . = 497; .header : { *(.header) } + _text = .; .inittext : { *(.inittext) } .initdata : { *(.initdata) } .text : { *(.text*) } @@ -36,6 +37,7 @@ SECTIONS LONG(0x5a5aaa55) } + _filesz = . - _text; . = ALIGN(16); __bss_start = .; @@ -46,8 +48,25 @@ SECTIONS . = ALIGN(16); _end = .; - /DISCARD/ : { *(.note*) } + _memsz = . - _text; - . = ASSERT(_end <= 0x8000, "Setup too big!"); - . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); + __setup_sects = . / 512; + + _ = ASSERT(_end <= 0x8000, "Setup too big!"); + _ = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); + + .notes : { + notes = .; + *(.note*) + notes_end = .; + } + notes_size = notes_end - notes; + + .kernel : { + kernel = .; + *(.kernel) + kernel_end = .; + kernel_size = kernel_end - kernel; + kernel_size_para = (kernel_size + 15) / 16; + } } =================================================================== --- a/arch/i386/boot/tools/.gitignore +++ /dev/null @@ -1,1 +0,0 @@ -build =================================================================== --- a/arch/i386/boot/tools/build.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright (C) 1997 Martin Mares - * Copyright (C) 2007 H. Peter Anvin - */ - -/* - * This file builds a disk-image from three different files: - * - * - setup: 8086 machine code, sets up system parm - * - system: 80386 code for actual system - * - * It does some checking that all files are of the correct type, and - * just writes the result to stdout, removing headers and padding to - * the right amount. It also writes some system data to stderr. - */ - -/* - * Changes by tytso to allow root device specification - * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 - * Cross compiling fixes by Gertjan van Wingerde, July 1996 - * Rewritten by Martin Mares, April 1997 - * Substantially overhauled by H. Peter Anvin, April 2007 - */ - -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <stdarg.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/sysmacros.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/mman.h> -#include <asm/boot.h> - -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned long u32; - -#define DEFAULT_MAJOR_ROOT 0 -#define DEFAULT_MINOR_ROOT 0 - -/* Minimal number of setup sectors */ -#define SETUP_SECT_MIN 5 -#define SETUP_SECT_MAX 64 - -/* This must be large enough to hold the entire setup */ -u8 buf[SETUP_SECT_MAX*512]; -int is_big_kernel; - -static void die(const char * str, ...) -{ - va_list args; - va_start(args, str); - vfprintf(stderr, str, args); - fputc('\n', stderr); - exit(1); -} - -static void usage(void) -{ - die("Usage: build [-b] setup system [rootdev] [> image]"); -} - -int main(int argc, char ** argv) -{ - unsigned int i, sz, setup_sectors; - int c; - u32 sys_size; - u8 major_root, minor_root; - struct stat sb; - FILE *file; - int fd; - void *kernel; - - if (argc > 2 && !strcmp(argv[1], "-b")) - { - is_big_kernel = 1; - argc--, argv++; - } - if ((argc < 3) || (argc > 4)) - usage(); - if (argc > 3) { - if (!strcmp(argv[3], "CURRENT")) { - if (stat("/", &sb)) { - perror("/"); - die("Couldn't stat /"); - } - major_root = major(sb.st_dev); - minor_root = minor(sb.st_dev); - } else if (strcmp(argv[3], "FLOPPY")) { - if (stat(argv[3], &sb)) { - perror(argv[3]); - die("Couldn't stat root device."); - } - major_root = major(sb.st_rdev); - minor_root = minor(sb.st_rdev); - } else { - major_root = 0; - minor_root = 0; - } - } else { - major_root = DEFAULT_MAJOR_ROOT; - minor_root = DEFAULT_MINOR_ROOT; - } - fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root); - - /* Copy the setup code */ - file = fopen(argv[1], "r"); - if (!file) - die("Unable to open `%s': %m", argv[1]); - c = fread(buf, 1, sizeof(buf), file); - if (ferror(file)) - die("read-error on `setup'"); - if (c < 1024) - die("The setup must be at least 1024 bytes"); - if (buf[510] != 0x55 || buf[511] != 0xaa) - die("Boot block hasn't got boot flag (0xAA55)"); - fclose(file); - - /* Pad unused space with zeros */ - setup_sectors = (c + 511) / 512; - if (setup_sectors < SETUP_SECT_MIN) - setup_sectors = SETUP_SECT_MIN; - i = setup_sectors*512; - memset(buf+c, 0, i-c); - - /* Set the default root device */ - buf[508] = minor_root; - buf[509] = major_root; - - fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i); - - /* Open and stat the kernel file */ - fd = open(argv[2], O_RDONLY); - if (fd < 0) - die("Unable to open `%s': %m", argv[2]); - if (fstat(fd, &sb)) - die("Unable to stat `%s': %m", argv[2]); - sz = sb.st_size; - fprintf (stderr, "System is %d kB\n", (sz+1023)/1024); - kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0); - if (kernel == MAP_FAILED) - die("Unable to mmap '%s': %m", argv[2]); - sys_size = (sz + 15) / 16; - if (!is_big_kernel && sys_size > DEF_SYSSIZE) - die("System is too big. Try using bzImage or modules."); - - /* Patch the setup code with the appropriate size parameters */ - buf[0x1f1] = setup_sectors-1; - buf[0x1f4] = sys_size; - buf[0x1f5] = sys_size >> 8; - buf[0x1f6] = sys_size >> 16; - buf[0x1f7] = sys_size >> 24; - - if (fwrite(buf, 1, i, stdout) != i) - die("Writing setup failed"); - - /* Copy the kernel code */ - if (fwrite(kernel, 1, sz, stdout) != sz) - die("Writing kernel failed"); - close(fd); - - /* Everything is OK */ - return 0; -} =================================================================== --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -1,9 +1,10 @@ #ifndef _LINUX_ELF_H #define _LINUX_ELF_H +#include <linux/elf-em.h> +#ifndef __ASSEMBLY__ #include <linux/types.h> #include <linux/auxvec.h> -#include <linux/elf-em.h> #include <asm/elf.h> #include <asm-generic/module.h> @@ -32,6 +33,7 @@ typedef __u32 Elf64_Word; typedef __u32 Elf64_Word; typedef __u64 Elf64_Xword; typedef __s64 Elf64_Sxword; +#endif /* __ASSEMBLY__ */ /* These constants are for the segment types stored in the image headers */ #define PT_NULL 0 @@ -124,6 +126,7 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) +#ifndef __ASSEMBLY__ struct elf32_dyn { Elf32_Sword d_tag; union{ @@ -139,6 +142,7 @@ struct elf64_dyn { Elf64_Addr d_ptr; } d_un; }; +#endif /* __ASSEMBLY__ */ /* The following are used with relocations */ #define ELF32_R_SYM(x) ((x) >> 8) @@ -147,6 +151,7 @@ struct elf64_dyn { #define ELF64_R_SYM(i) ((i) >> 32) #define ELF64_R_TYPE(i) ((i) & 0xffffffff) +#ifndef __ASSEMBLY__ struct elf32_rel { Elf32_Addr r_offset; Elf32_Word r_info; @@ -186,10 +191,11 @@ struct elf64_sym { Elf64_Addr st_value; /* Value of the symbol */ Elf64_Xword st_size; /* Associated symbol size */ }; - +#endif /* __ASSEMBLY__ */ #define EI_NIDENT 16 +#ifndef __ASSEMBLY__ struct elf32_hdr { unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; @@ -223,6 +229,7 @@ struct elf64_hdr { Elf64_Half e_shnum; Elf64_Half e_shstrndx; }; +#endif /* __ASSEMBLY__ */ /* These constants define the permissions on sections in the program header, p_flags. */ @@ -230,6 +237,7 @@ struct elf64_hdr { #define PF_W 0x2 #define PF_X 0x1 +#ifndef __ASSEMBLY__ struct elf32_phdr { Elf32_Word p_type; Elf32_Off p_offset; @@ -251,6 +259,7 @@ struct elf64_phdr { Elf64_Xword p_memsz; /* Segment size in memory */ Elf64_Xword p_align; /* Segment alignment, file & memory */ }; +#endif /* __ASSEMBLY__ */ /* sh_type */ #define SHT_NULL 0 @@ -285,7 +294,8 @@ struct elf64_phdr { #define SHN_ABS 0xfff1 #define SHN_COMMON 0xfff2 #define SHN_HIRESERVE 0xffff - + +#ifndef __ASSEMBLY__ struct elf32_shdr { Elf32_Word sh_name; Elf32_Word sh_type; @@ -311,6 +321,7 @@ struct elf64_shdr { Elf64_Xword sh_addralign; /* Section alignment */ Elf64_Xword sh_entsize; /* Entry size if section holds table */ }; +#endif /* __ASSEMBLY__ */ #define EI_MAG0 0 /* e_ident[] indexes */ #define EI_MAG1 1 @@ -344,6 +355,7 @@ struct elf64_shdr { #define ELFOSABI_NONE 0 #define ELFOSABI_LINUX 3 +#define ELFOSABI_STANDALONE 255 #ifndef ELF_OSABI #define ELF_OSABI ELFOSABI_NONE @@ -358,6 +370,7 @@ struct elf64_shdr { #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#ifndef __ASSEMBLY__ /* Note header in a PT_NOTE section */ struct elf32_note { Elf32_Word n_namesz; /* Name size */ @@ -397,5 +410,6 @@ static inline void arch_write_notes(stru #define ELF_CORE_EXTRA_NOTES_SIZE arch_notes_size() #define ELF_CORE_WRITE_EXTRA_NOTES arch_write_notes(file) #endif /* ARCH_HAVE_EXTRA_ELF_NOTES */ +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_ELF_H */ =================================================================== --- /dev/null +++ b/include/linux/elf_boot.h @@ -0,0 +1,16 @@ +#ifndef ELF_BOOT_H +#define ELF_BOOT_H + +/* Elf notes to help bootloaders identify what program they are booting. + */ + +/* Standardized Elf image notes for booting... The name for all of these is ELFBoot */ +#define ELF_NOTE_BOOT ELFBoot + +#define EIN_PROGRAM_NAME 1 /* The program in this ELF file */ +#define EIN_PROGRAM_VERSION 2 /* The version of the program in this ELF file */ +#define EIN_PROGRAM_CHECKSUM 3 /* ip style checksum of the memory image. */ +#define EIN_ARGUMENT_STYLE 4 /* String identifying argument passing style */ +#define EIN_BOOT_PARAMS 5 /* Offset of boot_params block */ + +#endif /* ELF_BOOT_H */ _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/virtualization