On 03/26/2020 11:37 PM, James Morse wrote: > An image loaded for kexec is not stored in place, instead its segments > are scattered through memory, and are re-assembled when needed. In the > meantime, the target memory may have been removed. > > Because mm is not aware that this memory is still in use, it allows it > to be removed. Why the isolation process does not fail when these pages are currently being used by kexec ? > > Add a memory notifier to prevent the removal of memory regions that > overlap with a loaded kexec image segment. e.g., when triggered from the > Qemu console: > | kexec_core: memory region in use > | memory memory32: Offline failed. Yes this is definitely an added protection for these kexec loaded kernels memory areas from being offlined but I would have expected the preceding offlining to have failed as well. > > Signed-off-by: James Morse <james.morse@xxxxxxx> > --- > kernel/kexec_core.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 56 insertions(+) > > diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c > index c19c0dad1ebe..ba1d91e868ca 100644 > --- a/kernel/kexec_core.c > +++ b/kernel/kexec_core.c > @@ -12,6 +12,7 @@ > #include <linux/slab.h> > #include <linux/fs.h> > #include <linux/kexec.h> > +#include <linux/memory.h> > #include <linux/mutex.h> > #include <linux/list.h> > #include <linux/highmem.h> > @@ -22,10 +23,12 @@ > #include <linux/elf.h> > #include <linux/elfcore.h> > #include <linux/utsname.h> > +#include <linux/notifier.h> > #include <linux/numa.h> > #include <linux/suspend.h> > #include <linux/device.h> > #include <linux/freezer.h> > +#include <linux/pfn.h> > #include <linux/pm.h> > #include <linux/cpu.h> > #include <linux/uaccess.h> > @@ -1219,3 +1222,56 @@ void __weak arch_kexec_protect_crashkres(void) > > void __weak arch_kexec_unprotect_crashkres(void) > {} > + > +/* > + * If user-space wants to offline memory that is in use by a loaded kexec > + * image, it should unload the image first. > + */ Probably this would need kexec user manual and related system call man pages update as well. > +static int mem_remove_cb(struct notifier_block *nb, unsigned long action, > + void *data) > +{ > + int rv = NOTIFY_OK, i; > + struct memory_notify *arg = data; > + unsigned long pfn = arg->start_pfn; > + unsigned long nr_segments, sstart, send; > + unsigned long end_pfn = arg->start_pfn + arg->nr_pages; > + > + might_sleep(); Required ? > + > + if (action != MEM_GOING_OFFLINE) > + return NOTIFY_DONE; > + > + mutex_lock(&kexec_mutex); > + if (kexec_image) { > + nr_segments = kexec_image->nr_segments; > + > + for (i = 0; i < nr_segments; i++) { > + sstart = PFN_DOWN(kexec_image->segment[i].mem); > + send = PFN_UP(kexec_image->segment[i].mem + > + kexec_image->segment[i].memsz); > + > + if ((pfn <= sstart && sstart < end_pfn) || > + (pfn <= send && send < end_pfn)) { > + pr_warn("Memory region in use\n"); > + rv = NOTIFY_BAD; > + break; > + } > + } > + } > + mutex_unlock(&kexec_mutex); > + > + return rv; Variable 'rv' is redundant, should use NOTIFY_[BAD|OK] directly instead. > +} > + > +static struct notifier_block mem_remove_nb = { > + .notifier_call = mem_remove_cb, > +}; > + > +static int __init register_mem_remove_cb(void) > +{ > + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) Should not all these new code here be wrapped with CONFIG_MEMORY_HOTREMOVE to reduce the scope as well as final code size when the config is disabled. > + return register_memory_notifier(&mem_remove_nb); > + > + return 0; > +} > +device_initcall(register_mem_remove_cb); >