Signed-off-by: Sourabh Jain <sourabhjain@xxxxxxxxxxxxx>
Cc: Akhil Raj <lf32.dev@xxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Aneesh Kumar K.V <aneesh.kumar@xxxxxxxxxx>
Cc: Baoquan He <bhe@xxxxxxxxxx>
Cc: Borislav Petkov (AMD) <bp@xxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: Christophe Leroy <christophe.leroy@xxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Dave Young <dyoung@xxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
Cc: Hari Bathini <hbathini@xxxxxxxxxxxxx>
Cc: Laurent Dufour <laurent.dufour@xxxxxxxxxx>
Cc: Mahesh Salgaonkar <mahesh@xxxxxxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Mimi Zohar <zohar@xxxxxxxxxxxxx>
Cc: Naveen N Rao <naveen@xxxxxxxxxx>
Cc: Oscar Salvador <osalvador@xxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Vivek Goyal <vgoyal@xxxxxxxxxx>
Cc: kexec@xxxxxxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
---
arch/powerpc/include/asm/kexec.h | 3 +
arch/powerpc/include/asm/kexec_ranges.h | 1 +
arch/powerpc/kexec/crash.c | 95 ++++++++++++++++++++++++-
arch/powerpc/kexec/file_load_64.c | 20 +++++-
arch/powerpc/kexec/ranges.c | 85 ++++++++++++++++++++++
5 files changed, 202 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/kexec.h
b/arch/powerpc/include/asm/kexec.h
index e75970351bcd..95a98b390d62 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -141,6 +141,9 @@ void arch_crash_handle_hotplug_event(struct
kimage *image, void *arg);
int arch_crash_hotplug_support(struct kimage *image, unsigned
long kexec_flags);
#define arch_crash_hotplug_support arch_crash_hotplug_support
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
#endif /* CONFIG_CRASH_HOTPLUG */
extern int crashing_cpu;
diff --git a/arch/powerpc/include/asm/kexec_ranges.h
b/arch/powerpc/include/asm/kexec_ranges.h
index 8489e844b447..14055896cbcb 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,6 +7,7 @@
void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64
size);
int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
int get_crash_memory_ranges(struct crash_mem **mem_ranges);
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 8938a19af12f..21b193e938a3 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -17,6 +17,7 @@
#include <linux/irq.h>
#include <linux/types.h>
#include <linux/libfdt.h>
+#include <linux/memory.h>
#include <asm/processor.h>
#include <asm/machdep.h>
@@ -25,6 +26,7 @@
#include <asm/setjmp.h>
#include <asm/debug.h>
#include <asm/interrupt.h>
+#include <asm/kexec_ranges.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter.
This is to
@@ -398,6 +400,94 @@ void default_machine_crash_shutdown(struct
pt_regs *regs)
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
+/*
+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned long phdr_cnt;
+
+ /* A program header for possible CPUs + vmcoreinfo */
+ phdr_cnt = num_possible_cpus() + 1;
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES;
+
+ return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr));
+}
+
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace
it with old
+ * elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @mn: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct
memory_notify *mn)
+{
+ int ret;
+ struct crash_mem *cmem = NULL;
+ struct kexec_segment *ksegment;
+ void *ptr, *mem, *elfbuf = NULL;
+ unsigned long elfsz, memsz, base_addr, size;
+
+ ksegment = &image->segment[image->elfcorehdr_index];
+ mem = (void *) ksegment->mem;
+ memsz = ksegment->memsz;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret) {
+ pr_err("Failed to get crash mem range\n");
+ return;
+ }
+
+ /*
+ * The hot unplugged memory is part of crash memory ranges,
+ * remove it here.
+ */
+ if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+ base_addr = PFN_PHYS(mn->start_pfn);
+ size = mn->nr_pages * PAGE_SIZE;
+ ret = remove_mem_range(&cmem, base_addr, size);
+ if (ret) {
+ pr_err("Failed to remove hot-unplugged memory from crash
memory ranges\n");
+ goto out;
+ }
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+ if (ret) {
+ pr_err("Failed to prepare elf header\n");
+ goto out;
+ }
+
+ /*
+ * It is unlikely that kernel hit this because elfcorehdr kexec
+ * segment (memsz) is built with addition space to accommodate
growing
+ * number of crash memory ranges while loading the kdump kernel.
It is
+ * Just to avoid any unforeseen case.
+ */
+ if (elfsz > memsz) {
+ pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu",
elfsz, memsz);
+ goto out;
+ }
+
+ ptr = __va(mem);
+ if (ptr) {
+ /* Temporarily invalidate the crash image while it is
replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+ memcpy((void *)ptr, elfbuf, elfsz);
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+ }
+out:
+ kvfree(cmem);
+ if (elfbuf)
+ kvfree(elfbuf);
+}
+
/**
* get_fdt_index - Loop through the kexec segment array and find
* the index of the FDT segment.
@@ -478,6 +568,8 @@ int arch_crash_hotplug_support(struct kimage
*image, unsigned long kexec_flags)
*/
void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
{
+ struct memory_notify *mn;
+
switch (image->hp_action) {
case KEXEC_CRASH_HP_REMOVE_CPU:
return;
@@ -488,7 +580,8 @@ void arch_crash_handle_hotplug_event(struct
kimage *image, void *arg)
case KEXEC_CRASH_HP_REMOVE_MEMORY:
case KEXEC_CRASH_HP_ADD_MEMORY:
- pr_info_once("Crash update is not supported for memory
hotplug\n");
+ mn = (struct memory_notify *)arg;
+ update_crash_elfcorehdr(image, mn);
return;
default:
pr_warn_once("Unknown hotplug action\n");
diff --git a/arch/powerpc/kexec/file_load_64.c
b/arch/powerpc/kexec/file_load_64.c
index 2640a804fcdf..925a69ad2468 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -595,6 +595,23 @@ static void update_backup_region_phdr(struct
kimage *image, Elf64_Ehdr *ehdr)
}
}
+static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem
*cmem)
+{
+#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned int extra_sz = 0;
+
+ if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM)
+ pr_warn("Number of Phdrs %u exceeds max\n",
CONFIG_CRASH_MAX_MEMORY_RANGES);
+ else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES)
+ pr_warn("Configured crash mem ranges may not be enough\n");
+ else
+ extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES -
cmem->nr_ranges) * sizeof(Elf64_Phdr);
+
+ return extra_sz;
+#endif
+ return 0;
+}
+
/**
* load_elfcorehdr_segment - Setup crash memory ranges and
initialize elfcorehdr
* segment needed to load kdump kernel.
@@ -626,7 +643,8 @@ static int load_elfcorehdr_segment(struct kimage
*image, struct kexec_buf *kbuf)
kbuf->buffer = headers;
kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf->bufsz = kbuf->memsz = headers_sz;
+ kbuf->bufsz = headers_sz;
+ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem);
kbuf->top_down = false;
ret = kexec_add_buffer(kbuf);
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 297b8bc97b9f..00dfd62203bd 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -618,4 +618,89 @@ int get_crash_memory_ranges(struct crash_mem
**mem_ranges)
pr_err("Failed to setup crash memory ranges\n");
return ret;
}
+
+/**
+ * remove_mem_range - Removes the given memory range from the range
list.
+ * @mem_ranges: Range list to remove the memory range to.
+ * @base: Base address of the range to remove.
+ * @size: Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ u64 end;
+ int ret = 0;
+ unsigned int i;
+ u64 mstart, mend;
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!size)
+ return 0;
+
+ /*
+ * Memory range are stored as start and end address, use
+ * the same format to do remove operation.
+ */
+ end = base + size - 1;
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+
+ /*
+ * Memory range to remove is not part of this range entry
+ * in the memory range list
+ */
+ if (!(base >= mstart && end <= mend))
+ continue;
+
+ /*
+ * Memory range to remove is equivalent to this entry in the
+ * memory range list. Remove the range entry from the list.
+ */
+ if (base == mstart && end == mend) {
+ for (; i < mem_rngs->nr_ranges - 1; i++) {
+ mem_rngs->ranges[i].start =
mem_rngs->ranges[i+1].start;
+ mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
+ }
+ mem_rngs->nr_ranges--;
+ goto out;
+ }
+ /*
+ * Start address of the memory range to remove and the
+ * current memory range entry in the list is same. Just
+ * move the start address of the current memory range
+ * entry in the list to end + 1.
+ */
+ else if (base == mstart) {
+ mem_rngs->ranges[i].start = end + 1;
+ goto out;
+ }
+ /*
+ * End address of the memory range to remove and the
+ * current memory range entry in the list is same.
+ * Just move the end address of the current memory
+ * range entry in the list to base - 1.
+ */
+ else if (end == mend) {
+ mem_rngs->ranges[i].end = base - 1;
+ goto out;
+ }
+ /*
+ * Memory range to remove is not at the edge of current
+ * memory range entry. Split the current memory entry into
+ * two half.
+ */
+ else {
+ mem_rngs->ranges[i].end = base - 1;
+ size = mem_rngs->ranges[i].end - end;
+ ret = add_mem_range(mem_ranges, end + 1, size);
+ }
+ }
+out:
+ return ret;
+}
#endif /* CONFIG_CRASH_DUMP */