> -----Original Message----- > From: lijiang [mailto:lijiang@xxxxxxxxxx] > Sent: Friday, October 25, 2019 10:31 AM > To: Simon Horman <horms@xxxxxxxxxxxx>; Hatayama, Daisuke/畑山 大輔 > <d.hatayama@xxxxxxxxxxx> > Cc: linux-kernel@xxxxxxxxxxxxxxx; jgross@xxxxxxxx; Thomas.Lendacky@xxxxxxx; > bhe@xxxxxxxxxx; x86@xxxxxxxxxx; kexec@xxxxxxxxxxxxxxxxxxx; > dhowells@xxxxxxxxxx; mingo@xxxxxxxxxx; bp@xxxxxxxxx; ebiederm@xxxxxxxxxxxx; > hpa@xxxxxxxxx; tglx@xxxxxxxxxxxxx; dyoung@xxxxxxxxxx; vgoyal@xxxxxxxxxx > Subject: Re: [PATCH 1/2 v5] x86/kdump: always reserve the low 1MiB when the > crashkernel option is specified > > 在 2019年10月24日 19:33, lijiang 写道: > > 在 2019年10月24日 18:07, Simon Horman 写道: > >> Hi Linbo, > >> > >> thanks for your patch. > >> > >> On Wed, Oct 23, 2019 at 10:19:11PM +0800, Lianbo Jiang wrote: > >>> Kdump kernel will reuse the first 640k region because the real mode > >>> trampoline has to work in this area. When the vmcore is dumped, the > >>> old memory in this area may be accessed, therefore, kernel has to > >>> copy the contents of the first 640k area to a backup region so that > >>> kdump kernel can read the old memory from the backup area of the > >>> first 640k area, which is done in the purgatory(). > >>> > >>> But, the current handling of copying the first 640k area runs into > >>> problems when SME is enabled, kernel does not properly copy these > >>> old memory to the backup area in the purgatory(), thereby, kdump > >>> kernel reads out the encrypted contents, because the kdump kernel > >>> must access the first kernel's memory with the encryption bit set > >>> when SME is enabled in the first kernel. Please refer to this link: > >>> > >>> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204793 > >>> > >>> Finally, it causes the following errors, and the crash tool gets > >>> invalid pointers when parsing the vmcore. > >>> > >>> crash> kmem -s|grep -i invalid > >>> kmem: dma-kmalloc-512: slab:ffffd77680001c00 invalid > freepointer:a6086ac099f0c5a4 > >>> kmem: dma-kmalloc-512: slab:ffffd77680001c00 invalid > freepointer:a6086ac099f0c5a4 > >>> crash> > >>> > >>> To avoid the above errors, when the crashkernel option is specified, > >>> lets reserve the remaining low 1MiB memory(after reserving real mode > >>> memory) so that the allocated memory does not fall into the low 1MiB > >>> area, which makes us not to copy the first 640k content to a backup > >>> region in purgatory(). This indicates that it does not need to be > >>> included in crash dumps or used for anything except the processor > >>> trampolines that must live in the low 1MiB. > >>> > >>> Signed-off-by: Lianbo Jiang <lijiang@xxxxxxxxxx> > >>> --- > >>> BTW:I also tried to fix the above problem in purgatory(), but there > >>> are too many restricts in purgatory() context, for example: i can't > >>> allocate new memory to create the identity mapping page table for > >>> SME situation. > >>> > >>> Currently, there are two places where the first 640k area is needed, > >>> the first one is in the find_trampoline_placement(), another one is > >>> in the reserve_real_mode(), and their content doesn't matter. > >>> > >>> In addition, also need to clean all the code related to the backup > >>> region later. > >>> > >>> arch/x86/realmode/init.c | 2 ++ > >>> include/linux/kexec.h | 2 ++ > >>> kernel/kexec_core.c | 13 +++++++++++++ > >>> 3 files changed, 17 insertions(+) > >>> > >>> diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c > >>> index 7dce39c8c034..064cc79a015d 100644 > >>> --- a/arch/x86/realmode/init.c > >>> +++ b/arch/x86/realmode/init.c > >>> @@ -3,6 +3,7 @@ > >>> #include <linux/slab.h> > >>> #include <linux/memblock.h> > >>> #include <linux/mem_encrypt.h> > >>> +#include <linux/kexec.h> > >>> > >>> #include <asm/set_memory.h> > >>> #include <asm/pgtable.h> > >>> @@ -34,6 +35,7 @@ void __init reserve_real_mode(void) > >>> > >>> memblock_reserve(mem, size); > >>> set_real_mode_mem(mem); > >>> + kexec_reserve_low_1MiB(); > >>> } > >>> > >>> static void __init setup_real_mode(void) > >>> diff --git a/include/linux/kexec.h b/include/linux/kexec.h > >>> index 1776eb2e43a4..30acf1d738bc 100644 > >>> --- a/include/linux/kexec.h > >>> +++ b/include/linux/kexec.h > >>> @@ -306,6 +306,7 @@ extern void __crash_kexec(struct pt_regs *); > >>> extern void crash_kexec(struct pt_regs *); > >>> int kexec_should_crash(struct task_struct *); > >>> int kexec_crash_loaded(void); > >>> +void __init kexec_reserve_low_1MiB(void); > >>> void crash_save_cpu(struct pt_regs *regs, int cpu); > >>> extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); > >>> > >>> @@ -397,6 +398,7 @@ static inline void __crash_kexec(struct pt_regs *regs) > { } > >>> static inline void crash_kexec(struct pt_regs *regs) { } > >>> static inline int kexec_should_crash(struct task_struct *p) { return 0; } > >>> static inline int kexec_crash_loaded(void) { return 0; } > >>> +static inline void __init kexec_reserve_low_1MiB(void) { } > >>> #define kexec_in_progress false > >>> #endif /* CONFIG_KEXEC_CORE */ > >>> > >>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c > >>> index 15d70a90b50d..5bd89f1fee42 100644 > >>> --- a/kernel/kexec_core.c > >>> +++ b/kernel/kexec_core.c > >>> @@ -37,6 +37,7 @@ > >>> #include <linux/compiler.h> > >>> #include <linux/hugetlb.h> > >>> #include <linux/frame.h> > >>> +#include <linux/memblock.h> > >>> > >>> #include <asm/page.h> > >>> #include <asm/sections.h> > >>> @@ -70,6 +71,18 @@ struct resource crashk_low_res = { > >>> .desc = IORES_DESC_CRASH_KERNEL > >>> }; > >>> > >>> +/* > >>> + * When the crashkernel option is specified, only use the low > >>> + * 1MiB for the real mode trampoline. > >>> + */ > >>> +void __init kexec_reserve_low_1MiB(void) > >>> +{ > >>> + if (strstr(boot_command_line, "crashkernel=")) { > >> > >> Could you comment on the issue of using strstr which > >> was raised by Hatayama-san in response to an earlier revision > >> of this patch? > >> > > > > Thank you, Simon and Hatayama-san. Lets talk about it here. > > > >> strstr() matches for example, > ANYEXTRACHARACTERScrashkernel=ANYEXTRACHARACTERS. > >> > >> Is it enough to use cmdline_find_option_bool()? > >> > > > > The cmdline_find_option_bool() will find a boolean option, but the > crashkernel option > > is not a boolean option, maybe it looks odd. So, should we use the > cmdline_find_option() > > better? > > > > +#include <asm/cmdline.h> > > > > void __init kexec_reserve_low_1MiB(void) > > { > > - if (strstr(boot_command_line, "crashkernel=")) { > > + char buffer[4]; > > + > > + if (cmdline_find_option(boot_command_line, "crashkernel=", > > + buffer, sizeof(buffer))) { > Maybe it is simpler as follow: > > + if (cmdline_find_option(boot_command_line, "crashkernel=", > + NULL, 0)) { > > Any thoughts? I wrote a test kernel module and it works as expected. static int __init testmod_init(void) { char cmdline1[] = "x y crashkernel z"; char cmdline2[] = "x y crashkernel=128M z"; printk("\"1: %d\n", cmdline_find_option_bool(cmdline1, "crashkernel")); printk("\"2: %d\n", cmdline_find_option_bool(cmdline1, "crashkernel=")); printk("\"3: %d\n", cmdline_find_option_bool(cmdline2, "crashkernel")); printk("\"4: %d\n", cmdline_find_option_bool(cmdline2, "crashkernel=")); printk("\"5: %d\n", cmdline_find_option(cmdline1, "crashkernel", NULL, 0)); printk("\"6: %d\n", cmdline_find_option(cmdline1, "crashkernel=", NULL, 0)); printk("\"7: %d\n", cmdline_find_option(cmdline2, "crashkernel", NULL, 0)); printk("\"8: %d\n", cmdline_find_option(cmdline2, "crashkernel=", NULL, 0)); return 0; } # dmesg | tail [85335.355459] "7: 4 [85335.356923] "8: -1 [85349.763849] "1: 5 [85349.765128] "2: 0 [85349.766159] "3: 0 [85349.767145] "4: 0 [85349.768157] "5: -1 [85349.769259] "6: -1 [85349.770423] "7: 4 [85349.771512] "8: -1 > > Thanks > Lianbo > > memblock_reserve(0, 1<<20); > > pr_info("Reserving the low 1MiB of memory for > crashkernel\n"); > > } > > > > And here, no need to parse the arguments of crashkernel(sometimes, which has > a > > complicated syntax), so the size of buffer should be enough. What's your > opinion? > > > > Thanks > > Lianbo > > > >> Thanks in advance! > >> > >>> + memblock_reserve(0, 1<<20); > >>> + pr_info("Reserving the low 1MiB of memory for > crashkernel\n"); > >>> + } > >>> +} > >>> + > >>> int kexec_should_crash(struct task_struct *p) > >>> { > >>> /* > >>> -- > >>> 2.17.1 > >>> > >>> > >>> _______________________________________________ > >>> kexec mailing list > >>> kexec@xxxxxxxxxxxxxxxxxxx > >>> http://lists.infradead.org/mailman/listinfo/kexec > >>>
#include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/ctype.h> #include <asm/setup.h> MODULE_AUTHOR("FUJITSU LIMITED"); MODULE_LICENSE("GPL v2"); static inline int myisspace(u8 c) { return c <= ' '; /* Close enough approximation */ } /** * Find a boolean option (like quiet,noapic,nosmp....) * * @cmdline: the cmdline string * @option: option string to look for * * Returns the position of that @option (starts counting with 1) * or 0 on not found. @option will only be found if it is found * as an entire word in @cmdline. For instance, if @option="car" * then a cmdline which contains "cart" will not match. */ static int __cmdline_find_option_bool(const char *cmdline, int max_cmdline_size, const char *option) { char c; int pos = 0, wstart = 0; const char *opptr = NULL; enum { st_wordstart = 0, /* Start of word/after whitespace */ st_wordcmp, /* Comparing this word */ st_wordskip, /* Miscompare, skip */ } state = st_wordstart; if (!cmdline) return -1; /* No command line */ /* * This 'pos' check ensures we do not overrun * a non-NULL-terminated 'cmdline' */ while (pos < max_cmdline_size) { c = *(char *)cmdline++; pos++; switch (state) { case st_wordstart: if (!c) return 0; else if (myisspace(c)) break; state = st_wordcmp; opptr = option; wstart = pos; /* fall through */ case st_wordcmp: if (!*opptr) { /* * We matched all the way to the end of the * option we were looking for. If the * command-line has a space _or_ ends, then * we matched! */ if (!c || myisspace(c)) return wstart; /* * We hit the end of the option, but _not_ * the end of a word on the cmdline. Not * a match. */ } else if (!c) { /* * Hit the NULL terminator on the end of * cmdline. */ return 0; } else if (c == *opptr++) { /* * We are currently matching, so continue * to the next character on the cmdline. */ break; } state = st_wordskip; /* fall through */ case st_wordskip: if (!c) return 0; else if (myisspace(c)) state = st_wordstart; break; } } return 0; /* Buffer overrun */ } /* * Find a non-boolean option (i.e. option=argument). In accordance with * standard Linux practice, if this option is repeated, this returns the * last instance on the command line. * * @cmdline: the cmdline string * @max_cmdline_size: the maximum size of cmdline * @option: option string to look for * @buffer: memory buffer to return the option argument * @bufsize: size of the supplied memory buffer * * Returns the length of the argument (regardless of if it was * truncated to fit in the buffer), or -1 on not found. */ static int __cmdline_find_option(const char *cmdline, int max_cmdline_size, const char *option, char *buffer, int bufsize) { char c; int pos = 0, len = -1; const char *opptr = NULL; char *bufptr = buffer; enum { st_wordstart = 0, /* Start of word/after whitespace */ st_wordcmp, /* Comparing this word */ st_wordskip, /* Miscompare, skip */ st_bufcpy, /* Copying this to buffer */ } state = st_wordstart; if (!cmdline) return -1; /* No command line */ /* * This 'pos' check ensures we do not overrun * a non-NULL-terminated 'cmdline' */ while (pos++ < max_cmdline_size) { c = *(char *)cmdline++; if (!c) break; switch (state) { case st_wordstart: if (myisspace(c)) break; state = st_wordcmp; opptr = option; /* fall through */ case st_wordcmp: if ((c == '=') && !*opptr) { /* * We matched all the way to the end of the * option we were looking for, prepare to * copy the argument. */ len = 0; bufptr = buffer; state = st_bufcpy; break; } else if (c == *opptr++) { /* * We are currently matching, so continue * to the next character on the cmdline. */ break; } state = st_wordskip; /* fall through */ case st_wordskip: if (myisspace(c)) state = st_wordstart; break; case st_bufcpy: if (myisspace(c)) { state = st_wordstart; } else { /* * Increment len, but don't overrun the * supplied buffer and leave room for the * NULL terminator. */ if (++len < bufsize) *bufptr++ = c; } break; } } if (bufsize) *bufptr = '\0'; return len; } int cmdline_find_option_bool(const char *cmdline, const char *option) { return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); } int cmdline_find_option(const char *cmdline, const char *option, char *buffer, int bufsize) { return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); } static int __init testmod_init(void) { char cmdline1[] = "x y crashkernel z"; char cmdline2[] = "x y crashkernel=128M z"; printk("\"1: %d\n", cmdline_find_option_bool(cmdline1, "crashkernel")); printk("\"2: %d\n", cmdline_find_option_bool(cmdline1, "crashkernel=")); printk("\"3: %d\n", cmdline_find_option_bool(cmdline2, "crashkernel")); printk("\"4: %d\n", cmdline_find_option_bool(cmdline2, "crashkernel=")); printk("\"5: %d\n", cmdline_find_option(cmdline1, "crashkernel", NULL, 0)); printk("\"6: %d\n", cmdline_find_option(cmdline1, "crashkernel=", NULL, 0)); printk("\"7: %d\n", cmdline_find_option(cmdline2, "crashkernel", NULL, 0)); printk("\"8: %d\n", cmdline_find_option(cmdline2, "crashkernel=", NULL, 0)); return 0; } static void __exit testmod_exit(void) { } module_init(testmod_init); module_exit(testmod_exit);
Attachment:
Makefile
Description: Makefile
_______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec