This patch adds support for kdump, the kernel will reserve a region
for the crash kernel and jump there on panic.
Arch-specific functions are added to allow for implementing a crash
dump file interface, /proc/vmcore, which can be viewed as a ELF file.
A user space tool, like kexec-tools, is responsible for allocating a
separate region for the core's ELF header within crash kdump kernel
memory and filling it in when executing kexec_load().
Then, its location will be advertised to crash dump kernel via a new
device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
the region for later use with fdt_reserve_elfcorehdr() at boot time.
At the same time, it will also limit the crash kdump kernel to the
crashkernel area via a new device-tree property, "linux, usable-memory-range",
so as not to destroy the original kernel dump data.
On crash dump kernel, /proc/vmcore will access the primary kernel's memory
with copy_oldmem_page().
I tested this on LoongArch 3A5000 machine and works as expected (Suggest
crashkernel parameter is "crashkernel=512M@2560M"), you may test it by
triggering a crash through /proc/sysrq_trigger:
$ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
# echo c > /proc/sysrq_trigger
Signed-off-by: Youling Tang <tangyouling@xxxxxxxxxxx>
---
arch/loongarch/Kconfig | 22 ++++++
arch/loongarch/Makefile | 4 ++
arch/loongarch/kernel/Makefile | 1 +
arch/loongarch/kernel/crash_dump.c | 19 +++++
arch/loongarch/kernel/machine_kexec.c | 95 +++++++++++++++++++++++--
arch/loongarch/kernel/mem.c | 6 ++
arch/loongarch/kernel/relocate_kernel.S | 6 ++
arch/loongarch/kernel/setup.c | 49 +++++++++++++
arch/loongarch/kernel/traps.c | 4 ++
9 files changed, 200 insertions(+), 6 deletions(-)
create mode 100644 arch/loongarch/kernel/crash_dump.c
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 08e063aaf847..4eeeebf888c1 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -433,6 +433,28 @@ config KEXEC
The name comes from the similarity to the exec system call.
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config PHYSICAL_START
+ hex "Physical address where the kernel is loaded"
+ default "0x90000000a0000000" if 64BIT
+ depends on CRASH_DUMP
+ help
+ This gives the XKPRANGE address where the kernel is loaded.
+ If you plan to use kernel for capturing the crash dump change
+ this value to start of the reserved region (the "X" value as
+ specified in the "crashkernel=YM@XM" command line boot parameter
+ passed to the panic-ed kernel).
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 69b39ba3a09d..224274c1644e 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -66,7 +66,11 @@ endif
cflags-y += -ffreestanding
cflags-y += $(call cc-option, -mno-check-zero-division)
+ifdef CONFIG_PHYSICAL_START
+load-y = $(CONFIG_PHYSICAL_START)
+else
load-y = 0x9000000000200000
+endif
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
drivers-$(CONFIG_PCI) += arch/loongarch/pci/
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 17dc8ce6b5ce..79eee7db1414 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
new file mode 100644
index 000000000000..13e5d2f7870d
--- /dev/null
+++ b/arch/loongarch/kernel/crash_dump.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/highmem.h>
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = kmap_local_pfn(pfn);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
+ kunmap_local(vaddr);
+
+ return csize;
+}
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index 7b3fea506c6d..847c4d025fed 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -6,11 +6,16 @@
*/
#include <linux/compiler.h>
#include <linux/cpu.h>
-#include <linux/kexec.h>
-#include <linux/mm.h>
+#include <linux/crash_dump.h>
#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kexec.h>
#include <linux/libfdt.h>
+#include <linux/mm.h>
#include <linux/of_fdt.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <asm/bootinfo.h>
#include <asm/cacheflush.h>
@@ -21,6 +26,7 @@
#define KEXEC_BLOB_ADDR TO_CACHE(0x108000UL)
static unsigned long reboot_code_buffer;
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
#ifdef CONFIG_SMP
static void (*relocated_kexec_smp_wait)(void *);
@@ -70,7 +76,7 @@ int machine_kexec_prepare(struct kimage *kimage)
continue;
}
- /* kexec need a safe page to save reboot_code_buffer */
+ /* kexec/kdump need a safe page to save reboot_code_buffer */
kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
reboot_code_buffer =
@@ -119,10 +125,85 @@ static void kexec_shutdown_secondary(void *)
kexec_reboot();
}
-#endif
+
+static void crash_shutdown_secondary(void *passed_regs)
+{
+ struct pt_regs *regs = passed_regs;
+ int cpu = smp_processor_id();
+
+ /*
+ * If we are passed registers, use those. Otherwise get the
+ * regs from the last interrupt, which should be correct, as
+ * we are in an interrupt. But if the regs are not there,
+ * pull them from the top of the stack. They are probably
+ * wrong, but we need something to keep from crashing again.
+ */
+ if (!regs)
+ regs = get_irq_regs();
+ if (!regs)
+ regs = task_pt_regs(current);
+
+ local_irq_disable();
+ if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+ crash_save_cpu(regs, cpu);
+ cpumask_set_cpu(cpu, &cpus_in_crash);
+
+ while (!atomic_read(&kexec_ready_to_reboot))
+ cpu_relax();
+
+ kexec_reboot();
+}
+
+void crash_smp_send_stop(void)
+{
+ static int cpus_stopped;
+ unsigned long timeout;
+ unsigned int ncpus;
+
+ /*
+ * This function can be called twice in panic path, but obviously
+ * we execute this only once.
+ */
+ if (cpus_stopped)
+ return;
+
+ cpus_stopped = 1;
+
+ /* Excluding the panic cpu */
+ ncpus = num_online_cpus() - 1;
+
+ smp_call_function(crash_shutdown_secondary, NULL, 0);
+ smp_wmb();
+
+ /*
+ * The crash CPU sends an IPI and wait for other CPUs to
+ * respond. Delay of at least 10 seconds.
+ */
+ pr_emerg("Sending IPI to other cpus...\n");
+ timeout = USEC_PER_SEC * 10;
+ while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
+ cpu_relax();
+ udelay(1);
+ }
+}
+#endif /* defined(CONFIG_SMP) */
void machine_crash_shutdown(struct pt_regs *regs)
{
+ int crashing_cpu;
+
+ local_irq_disable();
+
+ crashing_cpu = smp_processor_id();
+ crash_save_cpu(regs, crashing_cpu);
+
+#ifdef CONFIG_SMP
+ /* shutdown non-crashing cpus */
+ crash_smp_send_stop();
+#endif
+ cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+ pr_info("Starting crashdump kernel...\n");
}
void machine_shutdown(void)
@@ -143,7 +224,8 @@ void machine_kexec(struct kimage *image)
jump_addr = (unsigned long)phys_to_virt(image->start);
- first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);