safe for the kernel to modify the elfcorehdr (because kexec-tools
has excluded the elfcorehdr from the digest).
- the /sys/kernel/crash_elfcorehdr_size node to communicate to
kexec-tools what the preferred size of the elfcorehdr memory buffer
should be in order to accommodate hotplug changes.
- The sysfs crash_hotplug nodes (ie.
/sys/devices/system/[cpu|memory]/crash_hotplug) are now dynamic in
that they examine kexec_file_load() vs kexec_load(), and when
kexec_load(), whether or not KEXEC_UPDATE_ELFCOREHDR is in effect.
This is critical so that the udev rule processing of crash_hotplug
indicates correctly (ie. the userspace unload-then-load of the
kdump of the kdump image can be skipped, or not).
With this patch in place, I believe the following statements to be true
(with local testing to verify):
- For systems which have these kernel changes in place, but not the
corresponding changes to the crash hot plug udev rules and
kexec-tools, (ie "older" systems) those systems will continue to
unload-then-load the kdump image, as has always been done. The
kexec-tools will not set KEXEC_UPDATE_ELFCOREHDR.
- For systems which have these kernel changes in place and the proposed
udev rule changes in place, but not the kexec-tools changes in place:
- the use of kexec_load() will not set KEXEC_UPDATE_ELFCOREHDR and
so the unload-then-reload of kdump image will occur (the sysfs
crash_hotplug nodes will show 0).
- the use of kexec_file_load() will permit sysfs crash_hotplug nodes
to show 1, and the kernel will modify the elfcorehdr directly. And
with the udev changes in place, the unload-then-load will not occur!
- For systems which have these kernel changes as well as the udev and
kexec-tools changes in place, then the user/admin has full authority
over the enablement and support of crash hotplug support, whether via
kexec_file_load() or kexec_load().
Said differently, as kexec_load() was/is widely in use, these changes
permit it to continue to be used as-is (retaining the current unload-then-
reload behavior) until such time as the udev and kexec-tools changes can
be rolled out as well.
I've intentionally kept the changes related to userspace coordination
for kexec_load() separate as this need was identified late; the
rest of this series has been generally reviewed and accepted. Once
this support has been vetted, I can refactor if needed.
Suggested-by: Hari Bathini <hbathini@xxxxxxxxxxxxx>
Signed-off-by: Eric DeVolder <eric.devolder@xxxxxxxxxx>
---
arch/x86/include/asm/kexec.h | 11 +++++++----
arch/x86/kernel/crash.c | 27 +++++++++++++++++++++++++++
include/linux/kexec.h | 14 ++++++++++++--
include/uapi/linux/kexec.h | 1 +
kernel/crash_core.c | 31 +++++++++++++++++++++++++++++++
kernel/kexec.c | 3 +++
kernel/ksysfs.c | 15 +++++++++++++++
7 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 9143100ea3ea..3be6a98751f0 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -214,14 +214,17 @@ void arch_crash_handle_hotplug_event(struct kimage *image);
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
#ifdef CONFIG_HOTPLUG_CPU
-static inline int crash_hotplug_cpu_support(void) { return 1; }
-#define crash_hotplug_cpu_support crash_hotplug_cpu_support
+int arch_crash_hotplug_cpu_support(void);
+#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-static inline int crash_hotplug_memory_support(void) { return 1; }
-#define crash_hotplug_memory_support crash_hotplug_memory_support
+int arch_crash_hotplug_memory_support(void);
+#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
#endif
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 0c9d496cf7ce..8064e65de6c0 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -442,6 +442,33 @@ int crash_load_segments(struct kimage *image)
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
+/* These functions provide the value for the sysfs crash_hotplug nodes */
+#ifdef CONFIG_HOTPLUG_CPU
+int arch_crash_hotplug_cpu_support(void)
+{
+ return crash_check_update_elfcorehdr();
+}
+#endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_crash_hotplug_memory_support(void)
+{
+ return crash_check_update_elfcorehdr();
+}
+#endif
+
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned int sz;
+
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ sz = 2 + CONFIG_NR_CPUS_DEFAULT + CRASH_MAX_MEMORY_RANGES;
+ else
+ sz += 2 + CONFIG_NR_CPUS_DEFAULT;
+ sz *= sizeof(Elf64_Phdr);
+ return sz;
+}
+
/**
* arch_crash_handle_hotplug_event() - Handle hotplug elfcorehdr changes
* @image: the active struct kimage
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 6a8a724ac638..050e20066cdb 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -335,6 +335,10 @@ struct kimage {
unsigned int preserve_context : 1;
/* If set, we are using file mode kexec syscall */
unsigned int file_mode:1;
+#ifdef CONFIG_CRASH_HOTPLUG
+ /* If set, allow changes to elfcorehdr of kexec_load'd image */
+ unsigned int update_elfcorehdr:1;
+#endif
#ifdef ARCH_HAS_KIMAGE_ARCH
struct kimage_arch arch;
@@ -411,9 +415,9 @@ bool kexec_load_permitted(int kexec_image_type);
/* List of defined/legal kexec flags */
#ifndef CONFIG_KEXEC_JUMP
-#define KEXEC_FLAGS KEXEC_ON_CRASH
+#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_UPDATE_ELFCOREHDR)
#else
-#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
+#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_UPDATE_ELFCOREHDR)
#endif
/* List of defined/legal kexec file flags */
@@ -501,6 +505,8 @@ static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) {
static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
#endif
+int crash_check_update_elfcorehdr(void);
+
#ifndef crash_hotplug_cpu_support
static inline int crash_hotplug_cpu_support(void) { return 0; }
#endif
@@ -509,6 +515,10 @@ static inline int crash_hotplug_cpu_support(void) { return 0; }
static inline int crash_hotplug_memory_support(void) { return 0; }
#endif
+#ifndef crash_get_elfcorehdr_size
+static inline crash_get_elfcorehdr_size(void) { return 0; }
+#endif
+
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 981016e05cfa..01766dd839b0 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -12,6 +12,7 @@
/* kexec flags for different usage scenarios */
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_PRESERVE_CONTEXT 0x00000002
+#define KEXEC_UPDATE_ELFCOREHDR 0x00000004
#define KEXEC_ARCH_MASK 0xffff0000
/*
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index ef6e91daad56..e05bfdb7eaed 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -704,6 +704,33 @@ subsys_initcall(crash_save_vmcoreinfo_init);
#ifdef CONFIG_CRASH_HOTPLUG
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
+
+/*
+ * This routine utilized when the crash_hotplug sysfs node is read.
+ * It reflects the kernel's ability/permission to update the crash
+ * elfcorehdr directly.
+ */
+int crash_check_update_elfcorehdr(void)
+{
+ int rc = 0;
+
+ /* Obtain lock while reading crash information */
+ if (!kexec_trylock()) {
+ pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n");
+ return 0;
+ }
+ if (kexec_crash_image) {
+ if (kexec_crash_image->file_mode)
+ rc = 1;
+ else
+ rc = kexec_crash_image->update_elfcorehdr;
+ }
+ /* Release lock now that update complete */
+ kexec_unlock();
+
+ return rc;
+}
+
/*
* To accurately reflect hot un/plug changes of cpu and memory resources
* (including onling and offlining of those resources), the elfcorehdr
@@ -734,6 +761,10 @@ static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
image = kexec_crash_image;
+ /* Check that updating elfcorehdr is permitted */
+ if (!(image->file_mode || image->update_elfcorehdr))
+ goto out;
+
if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
pr_debug("hp_action %u, cpu %u\n", hp_action, cpu);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 92d301f98776..60de64bd14b9 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -129,6 +129,9 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
if (flags & KEXEC_PRESERVE_CONTEXT)
image->preserve_context = 1;
+ if (flags & KEXEC_UPDATE_ELFCOREHDR)
+ image->update_elfcorehdr = 1;
+
ret = machine_kexec_prepare(image);
if (ret)
goto out;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index aad7a3bfd846..1d4bc493b2f4 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -165,6 +165,18 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(vmcoreinfo);
+#ifdef CONFIG_CRASH_HOTPLUG
+static ssize_t crash_elfcorehdr_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ unsigned int sz = crash_get_elfcorehdr_size();
+
+ return sysfs_emit(buf, "%u\n", sz);
+}
+KERNEL_ATTR_RO(crash_elfcorehdr_size);
+
+#endif
+
#endif /* CONFIG_CRASH_CORE */
/* whether file capabilities are enabled */
@@ -255,6 +267,9 @@ static struct attribute * kernel_attrs[] = {
#endif
#ifdef CONFIG_CRASH_CORE
&vmcoreinfo_attr.attr,
+#ifdef CONFIG_CRASH_HOTPLUG
+ &crash_elfcorehdr_size_attr.attr,
+#endif
#endif
#ifndef CONFIG_TINY_RCU
&rcu_expedited_attr.attr,