On 12/11/23 at 02:00pm, Sourabh Jain wrote: ...... > diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h > index f83866a19e87..802abf580cf0 100644 > --- a/arch/powerpc/include/asm/kexec_ranges.h > +++ b/arch/powerpc/include/asm/kexec_ranges.h > @@ -7,6 +7,7 @@ > void sort_memory_ranges(struct crash_mem *mrngs, bool merge); > struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges); > int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); > +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size); > int add_tce_mem_ranges(struct crash_mem **mem_ranges); > int add_initrd_mem_range(struct crash_mem **mem_ranges); > #ifdef CONFIG_PPC_64S_HASH_MMU > diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c > index 9932793cd64b..5be30659172f 100644 > --- a/arch/powerpc/kexec/core_64.c > +++ b/arch/powerpc/kexec/core_64.c > @@ -19,8 +19,11 @@ > #include <linux/of.h> > #include <linux/libfdt.h> > #include <linux/memblock.h> > +#include <linux/memory.h> > > #include <asm/page.h> > +#include <asm/drmem.h> > +#include <asm/mmzone.h> > #include <asm/current.h> > #include <asm/machdep.h> > #include <asm/cacheflush.h> > @@ -547,9 +550,7 @@ int update_cpus_node(void *fdt) > #undef pr_fmt > #define pr_fmt(fmt) "crash hp: " fmt > > -#ifdef CONFIG_HOTPLUG_CPU > - /* Provides the value for the sysfs crash_hotplug nodes */ > -int arch_crash_hotplug_cpu_support(struct kimage *image) > +static int crash_hotplug_support(struct kimage *image) > { > if (image->file_mode) > return 1; > @@ -560,8 +561,118 @@ int arch_crash_hotplug_cpu_support(struct kimage *image) > */ > return image->update_elfcorehdr && image->update_fdt; > } > + > +#ifdef CONFIG_HOTPLUG_CPU > + /* Provides the value for the sysfs crash_hotplug nodes */ > +int arch_crash_hotplug_cpu_support(struct kimage *image) > +{ > + return crash_hotplug_support(image); > +} > +#endif > + > +#ifdef CONFIG_MEMORY_HOTPLUG > + /* Provides the value for the sysfs memory_hotplug nodes */ > +int arch_crash_hotplug_memory_support(struct kimage *image) > +{ > + return crash_hotplug_support(image); > +} > #endif > > +/* > + * Advertise preferred elfcorehdr size to userspace via > + * /sys/kernel/crash_elfcorehdr_size sysfs interface. > + */ > +unsigned int arch_crash_get_elfcorehdr_size(void) > +{ > + unsigned int sz; > + unsigned long elf_phdr_cnt; > + > + /* Program header for CPU notes and vmcoreinfo */ > + elf_phdr_cnt = 2; > + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) > + /* In the worst case, a Phdr is needed for every other LMB to be > + * represented as an individual crash range. > + */ > + elf_phdr_cnt += memory_hotplug_max() / (2 * drmem_lmb_size()); > + > + /* Do not cross the max limit */ > + if (elf_phdr_cnt > PN_XNUM) > + elf_phdr_cnt = PN_XNUM; > + > + sz = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(Elf64_Phdr)); > + return sz; > +} > + > +/** > + * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old > + * elfcorehdr in the kexec segment array. > + * @image: the active struct kimage > + * @mn: struct memory_notify data handler > + */ > +static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn) > +{ > + int ret; > + struct crash_mem *cmem = NULL; > + struct kexec_segment *ksegment; > + void *ptr, *mem, *elfbuf = NULL; > + unsigned long elfsz, memsz, base_addr, size; > + > + ksegment = &image->segment[image->elfcorehdr_index]; > + mem = (void *) ksegment->mem; > + memsz = ksegment->memsz; > + > + ret = get_crash_memory_ranges(&cmem); > + if (ret) { > + pr_err("Failed to get crash mem range\n"); > + return; > + } > + > + /* > + * The hot unplugged memory is part of crash memory ranges, > + * remove it here. > + */ > + if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { > + base_addr = PFN_PHYS(mn->start_pfn); > + size = mn->nr_pages * PAGE_SIZE; > + ret = remove_mem_range(&cmem, base_addr, size); Althouth this is ppc specific, I don't understand. Why don't you recreate the elfcorehdr, but take removing the removed region. Comparing the remove_mem_range() implementation with recreating, I don't see too much benefit from that, and it makes your code more complicated. Just curious, surely ppc people can decide what should be taken. > + if (ret) { > + pr_err("Failed to remove hot-unplugged from crash memory ranges.\n"); > + return; > + } > + } > + > + ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz); > + if (ret) { > + pr_err("Failed to prepare elf header\n"); > + return; > + } > + > + /* > + * It is unlikely that kernel hit this because elfcorehdr kexec > + * segment (memsz) is built with addition space to accommodate growing > + * number of crash memory ranges while loading the kdump kernel. It is > + * Just to avoid any unforeseen case. > + */ > + if (elfsz > memsz) { > + pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); > + goto out; > + } > + > + ptr = __va(mem); > + if (ptr) { > + /* Temporarily invalidate the crash image while it is replaced */ > + xchg(&kexec_crash_image, NULL); > + > + /* Replace the old elfcorehdr with newly prepared elfcorehdr */ > + memcpy((void *)ptr, elfbuf, elfsz); > + > + /* The crash image is now valid once again */ > + xchg(&kexec_crash_image, image); > + } > +out: > + vfree(elfbuf); > +} > + > /** > * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the > * necessary kexec segments based on the hotplug event. > @@ -572,7 +683,7 @@ int arch_crash_hotplug_cpu_support(struct kimage *image) > * CPU addition: Update the FDT segment to include the newly added CPU. > * CPU removal: No action is needed, with the assumption that it's okay to have offline CPUs > * as part of the FDT. > - * Memory addition/removal: No action is taken as this is not yet supported. > + * Memory addition/removal: Recreate the elfcorehdr segment > */ > void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) > { > @@ -593,7 +704,6 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) > return; > > } else if (hp_action == KEXEC_CRASH_HP_ADD_CPU) { > - > void *fdt, *ptr; > unsigned long mem; > int i, fdt_index = -1; > @@ -628,8 +738,10 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) > > } else if (hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY || > hp_action == KEXEC_CRASH_HP_ADD_MEMORY) { > - pr_info_once("Crash update is not supported for memory hotplug\n"); > - return; > + struct memory_notify *mn; > + > + mn = (struct memory_notify *)arg; > + update_crash_elfcorehdr(image, mn); > } > } > #endif > diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c > index e2148a009701..2457d7ec2075 100644 > --- a/arch/powerpc/kexec/file_load_64.c > +++ b/arch/powerpc/kexec/file_load_64.c > @@ -21,6 +21,8 @@ > #include <linux/memblock.h> > #include <linux/slab.h> > #include <linux/vmalloc.h> > +#include <linux/elf.h> > + > #include <asm/setup.h> > #include <asm/cputhreads.h> > #include <asm/drmem.h> > @@ -740,7 +742,35 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) > > kbuf->buffer = headers; > kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; > - kbuf->bufsz = kbuf->memsz = headers_sz; > + kbuf->bufsz = headers_sz; > +#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG) > + /* Adjust the elfcorehdr segment size to accommodate > + * future crash memory ranges. > + */ > + int max_lmb; > + unsigned long pnum; > + > + /* In the worst case, a Phdr is needed for every other LMB to be > + * represented as an individual crash range. > + */ > + max_lmb = memory_hotplug_max() / (2 * drmem_lmb_size()); > + > + /* Do not cross the Phdr max limit of the elf header. > + * Avoid counting Phdr for crash ranges (cmem->nr_ranges) > + * which are already part of elfcorehdr. > + */ > + if (max_lmb > PN_XNUM) > + pnum = PN_XNUM - cmem->nr_ranges; > + else > + pnum = max_lmb - cmem->nr_ranges; > + > + /* Additional buffer space for elfcorehdr to accommodate > + * future memory ranges. > + */ > + kbuf->memsz = headers_sz + pnum * sizeof(Elf64_Phdr); > +#else > + kbuf->memsz = headers_sz; > +#endif > kbuf->top_down = false; > > ret = kexec_add_buffer(kbuf); > @@ -750,7 +780,7 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) > } > > image->elf_load_addr = kbuf->mem; > - image->elf_headers_sz = headers_sz; > + image->elf_headers_sz = kbuf->memsz; > image->elf_headers = headers; > out: > kfree(cmem); > diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c > index fb3e12f15214..4fd0c5d5607b 100644 > --- a/arch/powerpc/kexec/ranges.c > +++ b/arch/powerpc/kexec/ranges.c > @@ -234,6 +234,91 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) > return __add_mem_range(mem_ranges, base, size); > } > > +/** > + * remove_mem_range - Removes the given memory range from the range list. > + * @mem_ranges: Range list to remove the memory range to. > + * @base: Base address of the range to remove. > + * @size: Size of the memory range to remove. > + * > + * (Re)allocates memory, if needed. > + * > + * Returns 0 on success, negative errno on error. > + */ > +int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) > +{ > + u64 end; > + int ret = 0; > + unsigned int i; > + u64 mstart, mend; > + struct crash_mem *mem_rngs = *mem_ranges; > + > + if (!size) > + return 0; > + > + /* > + * Memory range are stored as start and end address, use > + * the same format to do remove operation. > + */ > + end = base + size - 1; > + > + for (i = 0; i < mem_rngs->nr_ranges; i++) { > + mstart = mem_rngs->ranges[i].start; > + mend = mem_rngs->ranges[i].end; > + > + /* > + * Memory range to remove is not part of this range entry > + * in the memory range list > + */ > + if (!(base >= mstart && end <= mend)) > + continue; > + > + /* > + * Memory range to remove is equivalent to this entry in the > + * memory range list. Remove the range entry from the list. > + */ > + if (base == mstart && end == mend) { > + for (; i < mem_rngs->nr_ranges - 1; i++) { > + mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; > + mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; > + } > + mem_rngs->nr_ranges--; > + goto out; > + } > + /* > + * Start address of the memory range to remove and the > + * current memory range entry in the list is same. Just > + * move the start address of the current memory range > + * entry in the list to end + 1. > + */ > + else if (base == mstart) { > + mem_rngs->ranges[i].start = end + 1; > + goto out; > + } > + /* > + * End address of the memory range to remove and the > + * current memory range entry in the list is same. > + * Just move the end address of the current memory > + * range entry in the list to base - 1. > + */ > + else if (end == mend) { > + mem_rngs->ranges[i].end = base - 1; > + goto out; > + } > + /* > + * Memory range to remove is not at the edge of current > + * memory range entry. Split the current memory entry into > + * two half. > + */ > + else { > + mem_rngs->ranges[i].end = base - 1; > + size = mem_rngs->ranges[i].end - end; > + ret = add_mem_range(mem_ranges, end + 1, size); > + } > + } > +out: > + return ret; > +} > + > /** > * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. > * @mem_ranges: Range list to add the memory range(s) to. > -- > 2.41.0 > _______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec