Hey Rob!
On 14.12.23 23:36, Rob Herring wrote:
On Wed, Dec 13, 2023 at 12:04:43AM +0000, Alexander Graf wrote:
We now have all bits in place to support KHO kexecs. This patch adds
awareness of KHO in the kexec file as well as boot path for arm64 and
adds the respective kconfig option to the architecture so that it can
use KHO successfully.
Signed-off-by: Alexander Graf <graf@xxxxxxxxxx>
---
arch/arm64/Kconfig | 12 ++++++++++++
arch/arm64/kernel/setup.c | 2 ++
arch/arm64/mm/init.c | 8 ++++++++
drivers/of/fdt.c | 41 +++++++++++++++++++++++++++++++++++++++
drivers/of/kexec.c | 36 ++++++++++++++++++++++++++++++++++
5 files changed, 99 insertions(+)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b071a00425d..1ba338ce7598 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1501,6 +1501,18 @@ config ARCH_SUPPORTS_CRASH_DUMP
config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
def_bool CRASH_CORE
+config KEXEC_KHO
+ bool "kexec handover"
+ depends on KEXEC
+ select MEMBLOCK_SCRATCH
+ select LIBFDT
+ select CMA
+ help
+ Allow kexec to hand over state across kernels by generating and
+ passing additional metadata to the target kernel. This is useful
+ to keep data or state alive across the kexec. For this to work,
+ both source and target kernels need to have this option enabled.
Why do we have the same kconfig entry twice? Here and x86.
This was how the kexec config options were done when I wrote the patches
originally. Since then, looks like Eric DeVolder has cleaned up things
quite nicely. I'll adapt the new way.
+
config TRANS_TABLE
def_bool y
depends on HIBERNATION || KEXEC_CORE
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 417a8a86b2db..8035b673d96d 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -346,6 +346,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
paging_init();
+ kho_reserve_mem();
+
acpi_table_upgrade();
/* Parse the ACPI tables for possible boot-time configuration */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 74c1db8ce271..254d82f3383a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -358,6 +358,8 @@ void __init bootmem_init(void)
*/
arch_reserve_crashkernel();
+ kho_reserve();
+
reserve what? It is not obvious what the difference between
kho_reserve_mem() and kho_reserve() are.
Yeah, I agree. I was struggling to find good names for them. What they
do is:
kho_reserve() - Reserve CMA memory for later kexec. We use this memory
region as scratch memory later.
kho_reserve_mem() - Post-KHO. Creates memory reservations inside
memblocks for pre-KHO handed over memory.
For v2, I'll change them to kho_reserve_scratch() and
kho_reserve_previous_mem() unless you have better ideas :)
memblock_dump_all();
}
@@ -386,6 +388,12 @@ void __init mem_init(void)
/* this will put all unused low memory onto the freelists */
memblock_free_all();
+ /*
+ * Now that all KHO pages are marked as reserved, let's flip them back
+ * to normal pages with accurate refcount.
+ */
+ kho_populate_refcount();
+
/*
* Check boundaries twice: Some fundamental inconsistencies can be
* detected at build time already.
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index bf502ba8da95..af95139351ed 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1006,6 +1006,44 @@ void __init early_init_dt_check_for_usable_mem_range(void)
memblock_add(rgn[i].base, rgn[i].size);
}
+/**
+ * early_init_dt_check_kho - Decode info required for kexec handover from DT
+ */
+void __init early_init_dt_check_kho(void)
+{
+#ifdef CONFIG_KEXEC_KHO
if (!IS_ENABLED(CONFIG_KEXEC_KHO))
return;
You'll need a kho_populate() stub.
Always happy to remove #ifdefs :)
+ unsigned long node = chosen_node_offset;
+ u64 kho_start, scratch_start, scratch_size, mem_start, mem_size;
+ const __be32 *p;
+ int l;
+
+ if ((long)node < 0)
+ return;
+
+ p = of_get_flat_dt_prop(node, "linux,kho-dt", &l);
+ if (l != (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32))
+ return;
+
+ kho_start = dt_mem_next_cell(dt_root_addr_cells, &p);
+
+ p = of_get_flat_dt_prop(node, "linux,kho-scratch", &l);
+ if (l != (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32))
+ return;
+
+ scratch_start = dt_mem_next_cell(dt_root_addr_cells, &p);
+ scratch_size = dt_mem_next_cell(dt_root_addr_cells, &p);
+
+ p = of_get_flat_dt_prop(node, "linux,kho-mem", &l);
+ if (l != (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32))
+ return;
+
+ mem_start = dt_mem_next_cell(dt_root_addr_cells, &p);
+ mem_size = dt_mem_next_cell(dt_root_addr_cells, &p);
+
+ kho_populate(kho_start, scratch_start, scratch_size, mem_start, mem_size);
+#endif
+}
+
#ifdef CONFIG_SERIAL_EARLYCON
int __init early_init_dt_scan_chosen_stdout(void)
@@ -1304,6 +1342,9 @@ void __init early_init_dt_scan_nodes(void)
/* Handle linux,usable-memory-range property */
early_init_dt_check_for_usable_mem_range();
+
+ /* Handle kexec handover */
+ early_init_dt_check_kho();
}
bool __init early_init_dt_scan(void *params)
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 68278340cecf..a612e6bb8c75 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -264,6 +264,37 @@ static inline int setup_ima_buffer(const struct kimage *image, void *fdt,
}
#endif /* CONFIG_IMA_KEXEC */
+static int kho_add_chosen(const struct kimage *image, void *fdt, int chosen_node)
+{
+ int ret = 0;
+
+#ifdef CONFIG_KEXEC_KHO
ditto
Though perhaps image->kho is not defined?
Correct, it is not. But I'm happy to have a few local variables that I
stash the image->kho contents inside an ifdef into so we can at least
compile check all libfdt invocations.
Alex
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879