Signed-off-by: Sourabh Jain <sourabhjain@xxxxxxxxxxxxx>
---
Changelog:
Since v1:
- Find CPUs in the system using the /sys/devices/system/cpu/present sysfs
instead of traversing all nodes under /proc/device-tree/cpus.
- Added a new function to find present CPUs in the system.
- Removed unnecessary NULL check on seg_ptr from arch_do_exclude_segment().
---
kexec/arch/ppc64/crashdump-ppc64.c | 16 +-
kexec/arch/ppc64/fdt.c | 236 +++++++++++++++++++++++++++-
kexec/arch/ppc64/include/arch/fdt.h | 2 +-
kexec/arch/ppc64/kexec-elf-ppc64.c | 2 +-
kexec/arch/ppc64/kexec-ppc64.c | 9 +-
5 files changed, 258 insertions(+), 7 deletions(-)
diff --git a/kexec/arch/ppc64/crashdump-ppc64.c b/kexec/arch/ppc64/crashdump-ppc64.c
index 6d47898..98d439a 100644
--- a/kexec/arch/ppc64/crashdump-ppc64.c
+++ b/kexec/arch/ppc64/crashdump-ppc64.c
@@ -476,7 +476,7 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline,
uint64_t max_addr, unsigned long min_base)
{
void *tmp;
- unsigned long sz;
+ unsigned long sz, memsz;
uint64_t elfcorehdr;
int nr_ranges, align = 1024, i;
unsigned long long end;
@@ -531,8 +531,18 @@ int load_crashdump_segments(struct kexec_info *info, char* mod_cmdline,
}
}
- elfcorehdr = add_buffer(info, tmp, sz, sz, align, min_base,
- max_addr, 1);
+ memsz = sz;
+ /* To support --hotplug, replace the calculated memsz with the value
+ * from /sys/kernel/crash_elfcorehdr_size and align it correctly.
+ */
+ if (do_hotplug) {
+ if (elfcorehdrsz > sz)
+ memsz = _ALIGN(elfcorehdrsz, align);
+ }
+
+ /* Record the location of the elfcorehdr for hotplug handling */
+ info->elfcorehdr = elfcorehdr = add_buffer(info, tmp, sz, memsz, align,
+ min_base, max_addr, 1);
reserve(elfcorehdr, sz);
/* modify and store the cmdline in a global array. This is later
* read by flatten_device_tree and modified if required
diff --git a/kexec/arch/ppc64/fdt.c b/kexec/arch/ppc64/fdt.c
index 8bc6d2d..879240f 100644
--- a/kexec/arch/ppc64/fdt.c
+++ b/kexec/arch/ppc64/fdt.c
@@ -17,6 +17,13 @@
#include <libfdt.h>
#include <stdio.h>
#include <stdlib.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "../../kexec.h"
+#include "../../kexec-syscall.h"
/*
* Let the kernel know it booted from kexec, as some things (e.g.
@@ -46,17 +53,244 @@ static int fixup_kexec_prop(void *fdt)
return 0;
}
+static inline bool is_dot_dir(char * d_path)
+{
+ return d_path[0] == '.';
+}
+
+/*
+ * get_cpu_node_size - Returns size of files including file name size under
+ * the given @cpu_node_path.
+ */
+static int get_cpu_node_size(char *cpu_node_path)
+{
+ DIR *d;
+ struct dirent *de;
+ struct stat statbuf;
+ int cpu_node_size = 0;
+ char cpu_prop_path[2 * PATH_MAX];
+
+ d = opendir(cpu_node_path);
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ if (de->d_type != DT_REG)
+ continue;
+
+ memset(cpu_prop_path, '\0', PATH_MAX);
+ snprintf(cpu_prop_path, 2 * PATH_MAX, "%s/%s", cpu_node_path,
+ de->d_name);
+
+ if (stat(cpu_prop_path, &statbuf))
+ continue;
+
+ cpu_node_size += statbuf.st_size;
+ cpu_node_size += strlen(de->d_name);
+ }
+
+ return cpu_node_size;
+}
+
+/*
+ * is_cpu_node - Checks if the node specified by the given @path
+ * represents a CPU node.
+ *
+ * Returns true if the @path has a "device_type" file containing "cpu";
+ * otherwise, returns false.
+ */
+static bool is_cpu_node(char *path)
+{
+ FILE *file;
+ bool ret = false;
+ char device_type[4];
+
+ file = fopen(path, "r");
+ if (!file)
+ return false;
+
+ memset(device_type, '\0', 4);
+ if (fread(device_type, 1, 3, file) < 3)
+ goto out;
+
+ if (strcmp(device_type, "cpu"))
+ goto out;
+
+ ret = true;
+out:
+ fclose(file);
+ return ret;
+}
+
+static int get_threads_per_cpu(char *path)
+{
+ struct stat statbuf;
+ if (stat(path, &statbuf))
+ return 0;
+
+ return statbuf.st_size / 4;
+}
+
+/**
+ * get_present_cpus - finds the present CPUs in the system
+ *
+ * This function opens the file `/sys/devices/system/cpu/present` to read
+ * the range of present CPUs. It parses the range and calculates the
+ * total number of present CPUs in the system.
+ *
+ * Returns total number of present CPUs on success, -1 on failure.
+ */
+static int get_present_cpus()
+{
+ char *range;
+ char buf[1024];
+ int start, end;
+ int cpu_count = 0;
+ FILE *file = fopen("/sys/devices/system/cpu/present", "r");
+
+ if (!file)
+ return -1;
+
+ if (!fgets(buf, sizeof(buf), file))
+ return -1;
+
+ fclose(file);
+
+ range = strtok(buf, ",");
+ while (range != NULL) {
+ if (sscanf(range, "%d-%d", &start, &end) == 2) {
+ for (int i = start; i <= end; i++)
+ cpu_count++;
+ } else if (sscanf(range, "%d", &start) == 1) {
+ cpu_count++;
+ } else {
+ return -1;
+ }
+ range = strtok(NULL, ",");
+ }
+
+ return cpu_count;
+}
+
+/*
+ * get_cpu_info - Finds the following CPU attributes:
+ *
+ * threads_per_cpu: Number of threads per CPU, based on the device tree entry
+ * /proc/device-tree/cpus/<cpu_node>/ibm,ppc-interrupt-server#s.
+ * cpu_node_size: Size of files including file name size under a CPU node.
+ *
+ * Returns 0 on success, else -1.
+ */
+static int get_cpu_info(int *_present_cpus, int *_threads_per_cpu, int *_cpu_node_size)
+{
+ DIR *d;
+ struct dirent *de;
+ char path[PATH_MAX];
+ int present_cpus = 0, threads_per_cpu = 0, cpu_node_size = 0;
+ char *cpus_node_path = "/proc/device-tree/cpus";
+
+ present_cpus = get_present_cpus();
+ if (present_cpus < 0)
+ return -1;
+
+ d = opendir(cpus_node_path);
+ if (!d)
+ return -1;
+
+ while ((de = readdir(d)) != NULL) {
+ if ((de->d_type != DT_DIR) || is_dot_dir(de->d_name))
+ continue;
+
+ memset(path, '\0', PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/%s", cpus_node_path,
+ de->d_name, "device_type");
+
+ /* Skip nodes with device_type != "cpu" */
+ if (!is_cpu_node(path))
+ continue;
+
+ /*
+ * Found the first node under /proc/device-tree/cpus with
+ * device_type == "cpu"
+ */
+ memset(path, '\0', PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s", cpus_node_path, de->d_name);
+ cpu_node_size = get_cpu_node_size(path);
+
+ memset(path, '\0', PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/%s", cpus_node_path,
+ de->d_name, "ibm,ppc-interrupt-server#s");
+ threads_per_cpu = get_threads_per_cpu(path);
+ break;
+ }
+
+ closedir(d);
+
+ if (!(threads_per_cpu && cpu_node_size))
+ return -1;
+
+ *_present_cpus = present_cpus;
+ *_cpu_node_size = cpu_node_size;
+ *_threads_per_cpu = threads_per_cpu;
+
+ dbgprintf("present_cpus: %d, threads_per_cpu: %d, cpu_node_size: %d\n",
+ present_cpus, threads_per_cpu, cpu_node_size);
+
+ return 0;
+}
+
+/*
+ * kdump_fdt_extra_size - Calculates the extra size needed for the Flattened
+ * Device Tree (FDT) based on the possible and present
+ * CPUs in the system.
+ */
+static unsigned int kdump_fdt_extra_size(void)
+{
+ int cpus_in_system;
+ unsigned int extra_size = 0;
+ int present_cpus = 0, threads_per_cpu = 0, cpu_node_size = 0;
+ int possible_cpus;
+
+ /* ALL possible CPUs are present in FDT so no extra size required */
+ if (sysconf(_SC_NPROCESSORS_ONLN) == sysconf(_SC_NPROCESSORS_CONF))
+ return 0;
+
+ if (get_cpu_info(&present_cpus, &threads_per_cpu, &cpu_node_size)) {
+ die("Failed to get cpu info\n");
+ }
+
+ cpus_in_system = present_cpus / threads_per_cpu;
+ possible_cpus = sysconf(_SC_NPROCESSORS_CONF) / threads_per_cpu;
+ dbgprintf("cpus_in_system: %d, possible_cpus: %d\n", cpus_in_system,
+ possible_cpus);
+
+ if (cpus_in_system > possible_cpus)
+ die("Possible CPU nodes can't be less than active CPU nodes\n");
+
+ extra_size = (possible_cpus - cpus_in_system) * cpu_node_size;
+ dbgprintf("kdump fdt extra size: %u\n", extra_size);
+
+ return extra_size;
+}
/*
* For now, assume that the added content fits in the file.
* This should be the case when flattening from /proc/device-tree,
* and when passing in a dtb, dtc can be told to add padding.
*/
-int fixup_dt(char **fdt, off_t *size)
+int fixup_dt(char **fdt, off_t *size, unsigned long kexec_flags)
{
int ret;
*size += 4096;
+
+ /* To support --hotplug option for the kexec_load syscall, consider
+ * adding extra buffer to FDT so that the kernel can add CPU nodes
+ * of hot-added CPUs.
+ */
+ if (do_hotplug && (kexec_flags & KEXEC_ON_CRASH))
+ *size += kdump_fdt_extra_size();
+
*fdt = realloc(*fdt, *size);
if (!*fdt) {
fprintf(stderr, "%s: out of memory\n", __func__);
diff --git a/kexec/arch/ppc64/include/arch/fdt.h b/kexec/arch/ppc64/include/arch/fdt.h
index b19f185..5f340b0 100644
--- a/kexec/arch/ppc64/include/arch/fdt.h
+++ b/kexec/arch/ppc64/include/arch/fdt.h
@@ -3,6 +3,6 @@
#include <sys/types.h>
-int fixup_dt(char **fdt, off_t *size);
+int fixup_dt(char **fdt, off_t *size, unsigned long kexec_flags);
#endif
diff --git a/kexec/arch/ppc64/kexec-elf-ppc64.c b/kexec/arch/ppc64/kexec-elf-ppc64.c
index bdcfd20..858c994 100644
--- a/kexec/arch/ppc64/kexec-elf-ppc64.c
+++ b/kexec/arch/ppc64/kexec-elf-ppc64.c
@@ -345,7 +345,7 @@ int elf_ppc64_load(int argc, char **argv, const char *buf, off_t len,
create_flatten_tree(&seg_buf, &seg_size, cmdline);
}
- result = fixup_dt(&seg_buf, &seg_size);
+ result = fixup_dt(&seg_buf, &seg_size, info->kexec_flags);
if (result < 0)
return result;
diff --git a/kexec/arch/ppc64/kexec-ppc64.c b/kexec/arch/ppc64/kexec-ppc64.c
index fb27b6b..13c3ce3 100644
--- a/kexec/arch/ppc64/kexec-ppc64.c
+++ b/kexec/arch/ppc64/kexec-ppc64.c
@@ -24,6 +24,7 @@
#include <errno.h>
#include <stdint.h>
#include <string.h>
+#include <libfdt.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dirent.h>
@@ -968,7 +969,13 @@ void arch_update_purgatory(struct kexec_info *UNUSED(info))
{
}
-int arch_do_exclude_segment(struct kexec_segment *UNUSED(seg_ptr), struct kexec_info *UNUSED(info))
+int arch_do_exclude_segment(struct kexec_segment *seg_ptr, struct kexec_info *info)
{
+ if (info->elfcorehdr == (unsigned long) seg_ptr->mem)
+ return 1;
+
+ if (seg_ptr->buf && fdt_magic(seg_ptr->buf) == FDT_MAGIC)
+ return 1;
+
return 0;
}