On certain arm64 platforms, it has been noticed that due to a hole at the start of physical ram exposed to kernel (i.e. it doesn't start from address 0), the kernel still calculates the 'memstart_addr' kernel variable as 0. Whereas the SYSTEM_RAM or IOMEM_RESERVED range in '/proc/iomem' would carry a first entry whose start address is non-zero (as the physical ram exposed to the kernel starts from a non-zero address). In such cases, if we rely on '/proc/iomem' entries to calculate the phys_offset, then we will have mismatch between the user-space and kernel space 'PHYS_OFFSET' value. The present 'kexec-tools' code does the same in 'get_memory_ranges_iomem_cb()' function when it makes a call to 'set_phys_offset()'. This can cause the vmcore generated via 'kexec-tools' to miss the last few bytes as the first '/proc/iomem' starts from a non-zero address. Please see [0] for the original bug-report from Yanjiang Jin. The same can be fixed in the following manner: 1. For newer kernel (>= 4.19, with commit 23c85094fe1895caefdd ["proc/kcore: add vmcoreinfo note to /proc/kcore"] available), 'kcore' contains a new PT_NOTE which carries the VMCOREINFO information. If the same is available, one should prefer the same to retrieve 'PHYS_OFFSET' value exported by the kernel as this is now the standard interface exposed by kernel for sharing machine specific details with the user-land as per the arm64 kernel maintainers (see [1]) . 2. For older kernels, we can try and determine the PHYS_OFFSET value from PT_LOAD segments inside 'kcore' via some jugglery of the correct virtual and physical address combinations. As a fallback, we still support getting the PHYS_OFFSET values from '/proc/iomem', to maintain backward compatibility. Testing: ------- - Tested on my apm-mustang and qualcomm amberwing board with upstream kernel (4.20.0-rc7) for both KASLR and non-KASLR boot cases. References: ----------- [0] https://www.spinics.net/lists/kexec/msg20618.html [1] https://www.mail-archive.com/kexec@xxxxxxxxxxxxxxxxxxx/msg20300.html Reported-by: Yanjiang Jin <yanjiang.jin@xxxxxxxxxxxxxxxx> Signed-off-by: Bhupesh Sharma <bhsharma@xxxxxxxxxx> --- kexec/arch/arm64/kexec-arm64.c | 194 ++++++++++++++++++++++++++++++++++++++++- kexec/arch/arm64/kexec-arm64.h | 15 ++-- 2 files changed, 200 insertions(+), 9 deletions(-) diff --git a/kexec/arch/arm64/kexec-arm64.c b/kexec/arch/arm64/kexec-arm64.c index b143e861f7d9..34241afea6e1 100644 --- a/kexec/arch/arm64/kexec-arm64.c +++ b/kexec/arch/arm64/kexec-arm64.c @@ -14,6 +14,7 @@ #include <sys/stat.h> #include <linux/elf-em.h> #include <elf.h> +#include <elf_info.h> #include <unistd.h> #include <syscall.h> @@ -38,6 +39,21 @@ #define PROP_ELFCOREHDR "linux,elfcorehdr" #define PROP_USABLE_MEM_RANGE "linux,usable-memory-range" +#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36) +#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39) +#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42) +#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47) +#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48) + +/* Global flag which indicates that we have tried reading + * PHYS_OFFSET from 'kcore' already. + */ +static bool try_read_phys_offset_from_kcore = false; + +/* Machine specific details. */ +static int va_bits; +static unsigned long page_offset; + /* Global varables the core kexec routines expect. */ unsigned char reuse_initrd; @@ -750,6 +766,126 @@ void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, add_segment_phys_virt(info, buf, bufsz, base, memsz, 1); } +static inline void set_phys_offset(uint64_t v, char *set_method) +{ + if (arm64_mem.phys_offset == arm64_mem_ngv + || v < arm64_mem.phys_offset) { + arm64_mem.phys_offset = v; + dbgprintf("%s: phys_offset : %016lx (method : %s)\n", + __func__, arm64_mem.phys_offset, + set_method); + } +} + +/** + * get_va_bits - Helper for getting VA_BITS + */ + +static int get_va_bits(void) +{ + unsigned long long stext_sym_addr = get_kernel_sym("_stext"); + + if (stext_sym_addr == 0) { + fprintf(stderr, "Can't get the symbol of _stext.\n"); + return -1; + } + + /* Derive va_bits as per arch/arm64/Kconfig */ + if ((stext_sym_addr & PAGE_OFFSET_36) == PAGE_OFFSET_36) { + va_bits = 36; + } else if ((stext_sym_addr & PAGE_OFFSET_39) == PAGE_OFFSET_39) { + va_bits = 39; + } else if ((stext_sym_addr & PAGE_OFFSET_42) == PAGE_OFFSET_42) { + va_bits = 42; + } else if ((stext_sym_addr & PAGE_OFFSET_47) == PAGE_OFFSET_47) { + va_bits = 47; + } else if ((stext_sym_addr & PAGE_OFFSET_48) == PAGE_OFFSET_48) { + va_bits = 48; + } else { + fprintf(stderr, + "Cannot find a proper _stext for calculating VA_BITS\n"); + return -1; + } + + dbgprintf("va_bits : %d\n", va_bits); + + return 0; +} + +/** + * get_page_offset - Helper for getting PAGE_OFFSET + */ + +static int get_page_offset(void) +{ + int ret; + + ret = get_va_bits(); + if (ret < 0) + return ret; + + page_offset = (0xffffffffffffffffUL) << (va_bits - 1); + dbgprintf("page_offset : %lx\n", page_offset); + + return 0; +} + +/** + * get_phys_offset_from_vmcoreinfo_pt_note - Helper for getting PHYS_OFFSET + * from VMCOREINFO note inside 'kcore'. + */ + +static int get_phys_offset_from_vmcoreinfo_pt_note(unsigned long *phys_offset) +{ + int fd, ret = 0; + + if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { + fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); + return EFAILED; + } + + ret = read_phys_offset_elf_kcore(fd, phys_offset); + + close(fd); + return ret; +} + +/** + * get_phys_base_from_pt_load - Helper for getting PHYS_OFFSET + * from PT_LOADs inside 'kcore'. + */ + +int get_phys_base_from_pt_load(unsigned long *phys_offset) +{ + int i, fd, ret; + unsigned long long phys_start; + unsigned long long virt_start; + + ret = get_page_offset(); + if (ret < 0) + return ret; + + if ((fd = open("/proc/kcore", O_RDONLY)) < 0) { + fprintf(stderr, "Can't open (%s).\n", "/proc/kcore"); + return EFAILED; + } + + read_elf_kcore(fd); + + for (i = 0; get_pt_load(i, + &phys_start, NULL, &virt_start, NULL); + i++) { + if (virt_start != NOT_KV_ADDR + && virt_start >= page_offset + && phys_start != NOT_PADDR) + *phys_offset = phys_start - + (virt_start & ~page_offset); + } + + close(fd); + return 0; +} + /** * get_memory_ranges_iomem_cb - Helper for get_memory_ranges_iomem. */ @@ -757,11 +893,45 @@ void add_segment(struct kexec_info *info, const void *buf, size_t bufsz, static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, unsigned long long base, unsigned long long length) { + int ret; + unsigned long phys_offset = UINT64_MAX; struct memory_range *r; if (nr >= KEXEC_SEGMENT_MAX) return -1; + if (!try_read_phys_offset_from_kcore) { + /* Since kernel version 4.19, 'kcore' contains + * a new PT_NOTE which carries the VMCOREINFO + * information. + * If the same is available, one should prefer the + * same to retrieve 'PHYS_OFFSET' value exported by + * the kernel as this is now the standard interface + * exposed by kernel for sharing machine specific + * details with the userland. + */ + ret = get_phys_offset_from_vmcoreinfo_pt_note(&phys_offset); + if (!ret) { + if (phys_offset != UINT64_MAX) + set_phys_offset(phys_offset, + "vmcoreinfo pt_note"); + } else { + /* If we are running on a older kernel, + * try to retrieve the 'PHYS_OFFSET' value + * exported by the kernel in the 'kcore' + * file by reading the PT_LOADs and determining + * the correct combination. + */ + ret = get_phys_base_from_pt_load(&phys_offset); + if (!ret) + if (phys_offset != UINT64_MAX) + set_phys_offset(phys_offset, + "pt_load"); + } + + try_read_phys_offset_from_kcore = true; + } + r = (struct memory_range *)data + nr; if (!strncmp(str, SYSTEM_RAM, strlen(SYSTEM_RAM))) @@ -774,7 +944,26 @@ static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, r->start = base; r->end = base + length - 1; - set_phys_offset(r->start); + /* As a fallback option, we can try determining the PHYS_OFFSET + * value from the '/proc/iomem' entries as well. + * + * But note that this can be flaky, as on certain arm64 + * platforms, it has been noticed that due to a hole at the + * start of physical ram exposed to kernel + * (i.e. it doesn't start from address 0), the kernel still + * calculates the 'memstart_addr' kernel variable as 0. + * + * Whereas the SYSTEM_RAM or IOMEM_RESERVED range in + * '/proc/iomem' would carry a first entry whose start address + * is non-zero (as the physical ram exposed to the kernel + * starts from a non-zero address). + * + * In such cases, if we rely on '/proc/iomem' entries to + * calculate the phys_offset, then we will have mismatch + * between the user-space and kernel space 'PHYS_OFFSET' + * value. + */ + set_phys_offset(r->start, "iomem"); dbgprintf("%s: %016llx - %016llx : %s", __func__, r->start, r->end, str); @@ -783,7 +972,8 @@ static int get_memory_ranges_iomem_cb(void *data, int nr, char *str, } /** - * get_memory_ranges_iomem - Try to get the memory ranges from /proc/iomem. + * get_memory_ranges_iomem - Try to get the memory ranges from + * /proc/iomem. */ static int get_memory_ranges_iomem(struct memory_range *array, diff --git a/kexec/arch/arm64/kexec-arm64.h b/kexec/arch/arm64/kexec-arm64.h index 22e4b69d832c..cc3419f4c10f 100644 --- a/kexec/arch/arm64/kexec-arm64.h +++ b/kexec/arch/arm64/kexec-arm64.h @@ -21,6 +21,14 @@ #define MiB(x) (KiB(x) * 1024UL) #define GiB(x) (MiB(x) * 1024UL) +#define ULONGLONG_MAX (~0ULL) + +/* + * Incorrect address + */ +#define NOT_KV_ADDR (0x0) +#define NOT_PADDR (ULONGLONG_MAX) + int elf_arm64_probe(const char *kernel_buf, off_t kernel_size); int elf_arm64_load(int argc, char **argv, const char *kernel_buf, off_t kernel_size, struct kexec_info *info); @@ -60,13 +68,6 @@ static inline void reset_vp_offset(void) arm64_mem.vp_offset = arm64_mem_ngv; } -static inline void set_phys_offset(uint64_t v) -{ - if (arm64_mem.phys_offset == arm64_mem_ngv - || v < arm64_mem.phys_offset) - arm64_mem.phys_offset = v; -} - int arm64_process_image_header(const struct arm64_image_header *h); unsigned long arm64_locate_kernel_segment(struct kexec_info *info); int arm64_load_other_segments(struct kexec_info *info, -- 2.7.4 _______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec