Introduce the crash_hotplug attribute for memory and CPUs for use by userspace. These attributes directly facilitate the udev rule for managing userspace re-loading of the crash kernel upon hot un/plug changes. For memory, expose the crash_hotplug attribute to the /sys/devices/system/memory directory. For example: # udevadm info --attribute-walk /sys/devices/system/memory/memory81 looking at device '/devices/system/memory/memory81': KERNEL=="memory81" SUBSYSTEM=="memory" DRIVER=="" ATTR{online}=="1" ATTR{phys_device}=="0" ATTR{phys_index}=="00000051" ATTR{removable}=="1" ATTR{state}=="online" ATTR{valid_zones}=="Movable" looking at parent device '/devices/system/memory': KERNELS=="memory" SUBSYSTEMS=="" DRIVERS=="" ATTRS{auto_online_blocks}=="offline" ATTRS{block_size_bytes}=="8000000" ATTRS{crash_hotplug}=="1" For CPUs, expose the crash_hotplug attribute to the /sys/devices/system/cpu directory. For example: # udevadm info --attribute-walk /sys/devices/system/cpu/cpu0 looking at device '/devices/system/cpu/cpu0': KERNEL=="cpu0" SUBSYSTEM=="cpu" DRIVER=="processor" ATTR{crash_notes}=="277c38600" ATTR{crash_notes_size}=="368" ATTR{online}=="1" looking at parent device '/devices/system/cpu': KERNELS=="cpu" SUBSYSTEMS=="" DRIVERS=="" ATTRS{crash_hotplug}=="1" ATTRS{isolated}=="" ATTRS{kernel_max}=="8191" ATTRS{nohz_full}==" (null)" ATTRS{offline}=="4-7" ATTRS{online}=="0-3" ATTRS{possible}=="0-7" ATTRS{present}=="0-3" With these sysfs attributes in place, it is possible to efficiently instruct the udev rule to skip crash kernel reloading for kernels configured with crash hotplug support. For example, the following is the proposed udev rule change for RHEL system 98-kexec.rules (as the first lines of the rule file): # The kernel updates the crash elfcorehdr for CPU and memory changes SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" When examined in the context of 98-kexec.rules, the above rules test if crash_hotplug is set, and if so, the userspace initiated unload-then-reload of the crash kernel is skipped. CPU and memory checks are separated in accordance with CONFIG_HOTPLUG_CPU and CONFIG_MEMORY_HOTPLUG kernel config options. If an architecture supports, for example, memory hotplug but not CPU hotplug, then the /sys/devices/system/memory/crash_hotplug attribute file is present, but the /sys/devices/system/cpu/crash_hotplug attribute file will NOT be present. Thus the udev rule skips userspace processing of memory hot un/plug events, but the udev rule will evaluate false for CPU events, thus allowing userspace to process CPU hot un/plug events (ie the unload-then-reload of the kdump capture kernel). Signed-off-by: Eric DeVolder <eric.devolder@xxxxxxxxxx> Reviewed-by: Sourabh Jain <sourabhjain@xxxxxxxxxxxxx> Acked-by: Hari Bathini <hbathini@xxxxxxxxxxxxx> Acked-by: Baoquan He <bhe@xxxxxxxxxx> --- Documentation/ABI/testing/sysfs-devices-memory | 8 ++++++++ .../ABI/testing/sysfs-devices-system-cpu | 8 ++++++++ .../admin-guide/mm/memory-hotplug.rst | 8 ++++++++ Documentation/core-api/cpu_hotplug.rst | 18 ++++++++++++++++++ drivers/base/cpu.c | 16 ++++++++++++++-- drivers/base/memory.c | 13 +++++++++++++ include/linux/kexec.h | 8 ++++++++ 7 files changed, 77 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index d8b0f80b9e33..c50725ebebb7 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -110,3 +110,11 @@ Description: link is created for memory section 9 on node0. /sys/devices/system/node/node0/memory9 -> ../../memory/memory9 + +What: /sys/devices/system/cpu/crash_hotplug +Date: Jun 2023 +Contact: Linux kernel mailing list <linux-kernel@xxxxxxxxxxxxxxx> +Description: + (RO) indicates whether or not the kernel directly supports + modifying the crash elfcorehdr for memory hot un/plug and/or + on/offline changes. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index ecd585ca2d50..598b0fa67481 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -686,3 +686,11 @@ Description: (RO) the list of CPUs that are isolated and don't participate in load balancing. These CPUs are set by boot parameter "isolcpus=". + +What: /sys/devices/system/cpu/crash_hotplug +Date: Jun 2023 +Contact: Linux kernel mailing list <linux-kernel@xxxxxxxxxxxxxxx> +Description: + (RO) indicates whether or not the kernel directly supports + modifying the crash elfcorehdr for CPU hot un/plug and/or + on/offline changes. diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index 1b02fe5807cc..eb99d79223a3 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -291,6 +291,14 @@ The following files are currently defined: Availability depends on the CONFIG_ARCH_MEMORY_PROBE kernel configuration option. ``uevent`` read-write: generic udev file for device subsystems. +``crash_hotplug`` read-only: when changes to the system memory map + occur due to hot un/plug of memory, this file contains + '1' if the kernel updates the kdump capture kernel memory + map itself (via elfcorehdr), or '0' if userspace must update + the kdump capture kernel memory map. + + Availability depends on the CONFIG_MEMORY_HOTPLUG kernel + configuration option. ====================== ========================================================= .. note:: diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index e6f5bc39cf5c..54581c501562 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -741,6 +741,24 @@ will receive all events. A script like:: can process the event further. +When changes to the CPUs in the system occur, the sysfs file +/sys/devices/system/cpu/crash_hotplug contains '1' if the kernel +updates the kdump capture kernel list of CPUs itself (via elfcorehdr), +or '0' if userspace must update the kdump capture kernel list of CPUs. + +The availability depends on the CONFIG_HOTPLUG_CPU kernel configuration +option. + +To skip userspace processing of CPU hot un/plug events for kdump +(ie the unload-then-reload to obtain a current list of CPUs), this sysfs +file can be used in a udev rule as follows: + + SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" + +For a cpu hot un/plug event, if the architecture supports kernel updates +of the elfcorehdr (which contains the list of CPUs), then the rule skips +the unload-then-reload of the kdump capture kernel. + Kernel Inline Documentations Reference ====================================== diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2455cbcebc87..7637cd1dd480 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -20,6 +20,7 @@ #include <linux/tick.h> #include <linux/pm_qos.h> #include <linux/sched/isolation.h> +#include <linux/kexec.h> #include "base.h" @@ -130,8 +131,6 @@ static DEVICE_ATTR(probe, S_IWUSR, NULL, cpu_probe_store); static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store); #ifdef CONFIG_KEXEC -#include <linux/kexec.h> - static ssize_t crash_notes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -286,6 +285,14 @@ static ssize_t print_cpus_nohz_full(struct device *dev, } static DEVICE_ATTR(nohz_full, 0444, print_cpus_nohz_full, NULL); +static ssize_t crash_hotplug_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%d\n", crash_hotplug_cpu_support()); +} +static DEVICE_ATTR_ADMIN_RO(crash_hotplug); + static void cpu_device_release(struct device *dev) { /* @@ -470,6 +477,7 @@ static struct attribute *cpu_root_attrs[] = { &dev_attr_isolated.attr, &dev_attr_nohz_full.attr, &dev_attr_modalias.attr, + &dev_attr_crash_hotplug.attr, NULL }; @@ -505,6 +513,10 @@ cpu_root_attr_is_visible(struct kobject *kobj, if (attr == &dev_attr_modalias.attr) return mode; } + if (IS_ENABLED(CONFIG_CRASH_HOTPLUG)) { + if (attr == &dev_attr_crash_hotplug.attr) + return mode; + } return 0; } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 7294112fe646..a0b0862871ef 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -25,6 +25,7 @@ #include <linux/atomic.h> #include <linux/uaccess.h> +#include <linux/kexec.h> #define MEMORY_CLASS_NAME "memory" @@ -494,6 +495,13 @@ static ssize_t auto_online_blocks_store(struct device *dev, static DEVICE_ATTR_RW(auto_online_blocks); +static ssize_t crash_hotplug_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%d\n", crash_hotplug_memory_support()); +} +static DEVICE_ATTR_RO(crash_hotplug); + /* * Some architectures will have custom drivers to do this, and * will not need to do it from userspace. The fake hot-add code @@ -915,6 +923,7 @@ static struct attribute *memory_root_attrs[] = { &dev_attr_hard_offline_page.attr, &dev_attr_block_size_bytes.attr, &dev_attr_auto_online_blocks.attr, + &dev_attr_crash_hotplug.attr, NULL }; @@ -938,6 +947,10 @@ memory_root_attr_is_visible(struct kobject *kobj, return mode; if (attr == &dev_attr_auto_online_blocks.attr) return mode; + if (IS_ENABLED(CONFIG_CRASH_HOTPLUG)) { + if (attr == &dev_attr_crash_hotplug.attr) + return mode; + } return 0; } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index b9903dd48e24..6a8a724ac638 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -501,6 +501,14 @@ static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { static inline void arch_crash_handle_hotplug_event(struct kimage *image) { } #endif +#ifndef crash_hotplug_cpu_support +static inline int crash_hotplug_cpu_support(void) { return 0; } +#endif + +#ifndef crash_hotplug_memory_support +static inline int crash_hotplug_memory_support(void) { return 0; } +#endif + #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; -- 2.31.1