- Added a new function to find present CPUs in the system. - Removed unnecessary NULL check on seg_ptr from arch_do_exclude_segment(). --- kexec/arch/ppc64/crashdump-ppc64.c | 16 +- kexec/arch/ppc64/fdt.c | 236 +++++++++++++++++++++++++++- kexec/arch/ppc64/include/arch/fdt.h | 2 +- kexec/arch/ppc64/kexec-elf-ppc64.c | 2 +- kexec/arch/ppc64/kexec-ppc64.c | 9 +- 5 files changed, 258 insertions(+), 7 deletions(-) diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c index 6d47898..98d439a 100644 --- a/kexec/arch/ppc64/crashdump-ppc64.c +++ b/kexec/arch/ppc64/crashdump-ppc64.c @@ -476,7 +476,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, uint64_t max_addr, unsigned long min_base) { void *tmp; - unsigned long sz; + unsigned long sz, memsz; uint64_t elfcorehdr; int nr_ranges, align = 1024, i; unsigned long long end; @@ -531,8 +531,18 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline, } } - elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base, - max_addr, 1); + memsz = sz; + /* To support --hotplug, replace the calculated memsz with the value + * from /sys/kernel/crash_elfcorehdr_size and align it correctly. + */ + if (do_hotplug) { + if (elfcorehdrsz > sz) + memsz = _ALIGN(elfcorehdrsz, align); + } + + /* Record the location of the elfcorehdr for hotplug handling */ + info->elfcorehdr = elfcorehdr = add_buffer(info, tmp, sz, memsz, align, + min_base, max_addr, 1); reserve(elfcorehdr, sz); /* modify and store the cmdline in a global array. This is later * read by flatten_device_tree and modified if required diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c index 8bc6d2d..879240f 100644 --- a/kexec/arch/ppc64/fdt.c +++ b/kexec/arch/ppc64/fdt.c @@ -17,6 +17,13 @@ #include <libfdt.h> #include <stdio.h> #include <stdlib.h> +#include <limits.h> +#include <stdbool.h> +#include <dirent.h> +#include <sys/stat.h> + +#include "../../kexec.h" +#include "../../kexec-syscall.h" /* * Let the kernel know it booted from kexec, as some things (e.g. @@ -46,17 +53,244 @@ static int fixup_kexec_prop(void *fdt) return 0; } +static inline bool is_dot_dir(char * d_path) +{ + return d_path[0] == '.'; +} + +/* + * get_cpu_node_size - Returns size of files including file name size under + * the given @cpu_node_path. + */ +static int get_cpu_node_size(char *cpu_node_path) +{ + DIR *d; + struct dirent *de; + struct stat statbuf; + int cpu_node_size = 0; + char cpu_prop_path[2 * PATH_MAX]; + + d = opendir(cpu_node_path); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + if (de->d_type != DT_REG) + continue; + + memset(cpu_prop_path, '\0', PATH_MAX); + snprintf(cpu_prop_path, 2 * PATH_MAX, "%s/%s", cpu_node_path, + de->d_name); + + if (stat(cpu_prop_path, &statbuf)) + continue; + + cpu_node_size += statbuf.st_size; + cpu_node_size += strlen(de->d_name); + } + + return cpu_node_size; +} + +/* + * is_cpu_node - Checks if the node specified by the given @path + * represents a CPU node. + * + * Returns true if the @path has a "device_type" file containing "cpu"; + * otherwise, returns false. + */ +static bool is_cpu_node(char *path) +{ + FILE *file; + bool ret = false; + char device_type[4]; + + file = fopen(path, "r"); + if (!file) + return false; + + memset(device_type, '\0', 4); + if (fread(device_type, 1, 3, file) < 3) + goto out; + + if (strcmp(device_type, "cpu")) + goto out; + + ret = true; +out: + fclose(file); + return ret; +} + +static int get_threads_per_cpu(char *path) +{ + struct stat statbuf; + if (stat(path, &statbuf)) + return 0; + + return statbuf.st_size / 4; +} + +/** + * get_present_cpus - finds the present CPUs in the system + * + * This function opens the file `/sys/devices/system/cpu/present` to read + * the range of present CPUs. It parses the range and calculates the + * total number of present CPUs in the system. + * + * Returns total number of present CPUs on success, -1 on failure. + */ +static int get_present_cpus() +{ + char *range; + char buf[1024]; + int start, end; + int cpu_count = 0; + FILE *file = fopen("/sys/devices/system/cpu/present", "r"); + + if (!file) + return -1; + + if (!fgets(buf, sizeof(buf), file)) + return -1; + + fclose(file); + + range = strtok(buf, ","); + while (range != NULL) { + if (sscanf(range, "%d-%d", &start, &end) == 2) { + for (int i = start; i <= end; i++) + cpu_count++; + } else if (sscanf(range, "%d", &start) == 1) { + cpu_count++; + } else { + return -1; + } + range = strtok(NULL, ","); + } + + return cpu_count; +} + +/* + * get_cpu_info - Finds the following CPU attributes: + * + * threads_per_cpu: Number of threads per CPU, based on the device tree entry + * /proc/device-tree/cpus/<cpu_node>/ibm,ppc-interrupt-server#s. + * cpu_node_size: Size of files including file name size under a CPU node. + * + * Returns 0 on success, else -1. + */ +static int get_cpu_info(int *_present_cpus, int *_threads_per_cpu, int *_cpu_node_size) +{ + DIR *d; + struct dirent *de; + char path[PATH_MAX]; + int present_cpus = 0, threads_per_cpu = 0, cpu_node_size = 0; + char *cpus_node_path = "/proc/device-tree/cpus"; + + present_cpus = get_present_cpus(); + if (present_cpus < 0) + return -1; + + d = opendir(cpus_node_path); + if (!d) + return -1; + + while ((de = readdir(d)) != NULL) { + if ((de->d_type != DT_DIR) || is_dot_dir(de->d_name)) + continue; + + memset(path, '\0', PATH_MAX); + snprintf(path, PATH_MAX, "%s/%s/%s", cpus_node_path, + de->d_name, "device_type"); + + /* Skip nodes with device_type != "cpu" */ + if (!is_cpu_node(path)) + continue; + + /* + * Found the first node under /proc/device-tree/cpus with + * device_type == "cpu" + */ + memset(path, '\0', PATH_MAX); + snprintf(path, PATH_MAX, "%s/%s", cpus_node_path, de->d_name); + cpu_node_size = get_cpu_node_size(path); + + memset(path, '\0', PATH_MAX); + snprintf(path, PATH_MAX, "%s/%s/%s", cpus_node_path, + de->d_name, "ibm,ppc-interrupt-server#s"); + threads_per_cpu = get_threads_per_cpu(path); + break; + } + + closedir(d); + + if (!(threads_per_cpu && cpu_node_size)) + return -1; + + *_present_cpus = present_cpus; + *_cpu_node_size = cpu_node_size; + *_threads_per_cpu = threads_per_cpu; + + dbgprintf("present_cpus: %d, threads_per_cpu: %d, cpu_node_size: %d\n", + present_cpus, threads_per_cpu, cpu_node_size); + + return 0; +} + +/* + * kdump_fdt_extra_size - Calculates the extra size needed for the Flattened + * Device Tree (FDT) based on the possible and present + * CPUs in the system. + */ +static unsigned int kdump_fdt_extra_size(void) +{ + int cpus_in_system; + unsigned int extra_size = 0; + int present_cpus = 0, threads_per_cpu = 0, cpu_node_size = 0; + int possible_cpus; + + /* ALL possible CPUs are present in FDT so no extra size required */ + if (sysconf(_SC_NPROCESSORS_ONLN) == sysconf(_SC_NPROCESSORS_CONF)) + return 0; + + if (get_cpu_info(&present_cpus, &threads_per_cpu, &cpu_node_size)) { + die("Failed to get cpu info\n"); + } + + cpus_in_system = present_cpus / threads_per_cpu; + possible_cpus = sysconf(_SC_NPROCESSORS_CONF) / threads_per_cpu; + dbgprintf("cpus_in_system: %d, possible_cpus: %d\n", cpus_in_system, + possible_cpus); + + if (cpus_in_system > possible_cpus) + die("Possible CPU nodes can't be less than active CPU nodes\n"); + + extra_size = (possible_cpus - cpus_in_system) * cpu_node_size; + dbgprintf("kdump fdt extra size: %u\n", extra_size); + + return extra_size; +} /* * For now, assume that the added content fits in the file. * This should be the case when flattening from /proc/device-tree, * and when passing in a dtb, dtc can be told to add padding. */ -int fixup_dt(char **fdt, off_t *size) +int fixup_dt(char **fdt, off_t *size, unsigned long kexec_flags) { int ret; *size += 4096; + + /* To support --hotplug option for the kexec_load syscall, consider + * adding extra buffer to FDT so that the kernel can add CPU nodes + * of hot-added CPUs. + */ + if (do_hotplug && (kexec_flags & KEXEC_ON_CRASH)) + *size += kdump_fdt_extra_size(); + *fdt = realloc(*fdt, *size); if (!*fdt) { fprintf(stderr, "%s: out of memory\n", __func__); diff --git a/kexec/arch/ppc64/include/arch/fdt.h b/kexec/arch/ppc64/include/arch/fdt.h index b19f185..5f340b0 100644 --- a/kexec/arch/ppc64/include/arch/fdt.h +++ b/kexec/arch/ppc64/include/arch/fdt.h @@ -3,6 +3,6 @@ #include <sys/types.h> -int fixup_dt(char **fdt, off_t *size); +int fixup_dt(char **fdt, off_t *size, unsigned long kexec_flags); #endif diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c index bdcfd20..858c994 100644 --- a/kexec/arch/ppc64/kexec-elf-ppc64.c +++ b/kexec/arch/ppc64/kexec-elf-ppc64.c @@ -345,7 +345,7 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len, create_flatten_tree(&seg_buf, &seg_size, cmdline); } - result = fixup_dt(&seg_buf, &seg_size); + result = fixup_dt(&seg_buf, &seg_size, info->kexec_flags); if (result < 0) return result; diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c index fb27b6b..13c3ce3 100644 --- a/kexec/arch/ppc64/kexec-ppc64.c +++ b/kexec/arch/ppc64/kexec-ppc64.c @@ -24,6 +24,7 @@ #include <errno.h> #include <stdint.h> #include <string.h> +#include <libfdt.h> #include <sys/stat.h> #include <sys/types.h> #include <dirent.h> @@ -968,7 +969,13 @@ void arch_update_purgatory(struct kexec_info *UNUSED(info)) { } -int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct kexec_info *UNUSED(info)) +int arch_do_exclude_segment(struct kexec_segment *seg_ptr, struct kexec_info *info) { + if (info->elfcorehdr == (unsigned long) seg_ptr->mem) + return 1; + + if (seg_ptr->buf && fdt_magic(seg_ptr->buf) == FDT_MAGIC) + return 1; + return 0; } -- 2.45.1 _______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec