On Fri, May 18, 2018 at 07:09:10PM +0000, Sunil Muthuswamy wrote: > In the VM mode on Hyper-V, currently, when the kernel panics, an error > code and few register values are populated in an MSR and the Hypervisor > notified. This information is collected on the host. The amount of > information currently collected is found to be limited and not very > actionable. To gather more actionable data, such as stack trace, the > proposal is to write one page worth of kmsg data on an allocated page > and the Hypervisor notified of the page address through the MSR. > > - Added a sysctl option to control the behavior, with ON by default. > > CC: kys@xxxxxxxxxxxxx > CC: sthemmin@xxxxxxxxxxxxx > Signed-off-by: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx> > --- > arch/x86/hyperv/hv_init.c | 35 ++++++++++ > arch/x86/include/asm/hyperv-tlfs.h | 5 +- > arch/x86/include/asm/mshyperv.h | 1 + > drivers/hv/vmbus_drv.c | 128 +++++++++++++++++++++++++++++++++++++ > 4 files changed, 167 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > index cfecc22..fc1e3cb 100644 > --- a/arch/x86/hyperv/hv_init.c > +++ b/arch/x86/hyperv/hv_init.c > @@ -395,6 +395,41 @@ void hyperv_report_panic(struct pt_regs *regs, long err) > } > EXPORT_SYMBOL_GPL(hyperv_report_panic); > > +/** > + * hyperv_report_panic_msg - report panic message to Hyper-V > + * @pa: physical address of the panic page containing the message > + * @size: size of the message in the page > + * > + */ > + The blank lines are not needed, please drop. > +void hyperv_report_panic_msg(phys_addr_t pa, size_t size) > +{ > + static bool panic_msg_reported; > + > + if (panic_msg_reported) > + return; > + panic_msg_reported = true; > + > + /* > + * P3 to contain the physical address of the panic page & P4 to > + * contain the size of the panic data in that page. Rest of the > + * registers are no-op when the NOTIFY_MSG flag is set. > + */ > + wrmsrl(HV_X64_MSR_CRASH_P0, 0); > + wrmsrl(HV_X64_MSR_CRASH_P1, 0); > + wrmsrl(HV_X64_MSR_CRASH_P2, 0); > + wrmsrl(HV_X64_MSR_CRASH_P3, pa); > + wrmsrl(HV_X64_MSR_CRASH_P4, size); > + > + /* > + * Let Hyper-V know there is crash data available along with > + * the panic message. > + */ > + wrmsrl(HV_X64_MSR_CRASH_CTL, > + (HV_CRASH_CTL_CRASH_NOTIFY | HV_CRASH_CTL_CRASH_NOTIFY_MSG)); > +} > +EXPORT_SYMBOL_GPL(hyperv_report_panic_msg); > + > bool hv_is_hyperv_initialized(void) > { > union hv_x64_msr_hypercall_contents hypercall_msr; > diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h > index 416cb0e..fc2932c 100644 > --- a/arch/x86/include/asm/hyperv-tlfs.h > +++ b/arch/x86/include/asm/hyperv-tlfs.h > @@ -171,9 +171,10 @@ > #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14) > > /* > - * Crash notification flag. > + * Crash notification flags. > */ > -#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) > +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) > +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG (1ULL << 62) BIT_ULL()? And again, why are they not in numerical order? > /* MSR used to identify the guest OS. */ > #define HV_X64_MSR_GUEST_OS_ID 0x40000000 > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index b90e796..ac83f2d 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -262,6 +262,7 @@ void hyperv_init(void); > void hyperv_setup_mmu_ops(void); > void hyper_alloc_mmu(void); > void hyperv_report_panic(struct pt_regs *regs, long err); > +void hyperv_report_panic_msg(phys_addr_t pa, size_t size); > bool hv_is_hyperv_initialized(void); > void hyperv_cleanup(void); > > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c > index b10fe26..7b04f7f 100644 > --- a/drivers/hv/vmbus_drv.c > +++ b/drivers/hv/vmbus_drv.c > @@ -56,6 +56,8 @@ static struct completion probe_event; > > static int hyperv_cpuhp_online; > > +static void *hv_panic_page; > + > static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, > void *args) > { > @@ -1018,6 +1020,86 @@ static void vmbus_isr(void) > add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); > } > > +/* > + * Boolean to control whether to report panic messages over Hyper-V. > + * > + * It can be set via /proc/sys/kernel/hyperv/record_panic_msg > + */ > +int sysctl_record_panic_msg = 1; Why was a sysctl chosen? I'm not objecting, but you have to document new user/kernel apis when you create them. > +/* > + * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg > + * buffer and call into Hyper-V to transfer the data. > + */ > +static void hv_kmsg_dump(struct kmsg_dumper *dumper, > + enum kmsg_dump_reason reason) > +{ > + size_t bytes_written; > + phys_addr_t panic_pa; > + > + /* We are only interested in panics. */ > + if (reason != KMSG_DUMP_PANIC) > + return; > + > + if (!sysctl_record_panic_msg) > + return; > + > + if (!hv_panic_page) > + return; > + > + panic_pa = virt_to_phys(hv_panic_page); > + > + /* > + * Write dump contents to the page. No need to synchronize; panic should > + * be single-threaded. > + */ > + if (!kmsg_dump_get_buffer(dumper, true, hv_panic_page, > + PAGE_SIZE, &bytes_written)) { > + pr_err("Hyper-V - Unable to get kmsg data for panic\n"); > + return; > + } > + > + hyperv_report_panic_msg(panic_pa, bytes_written); > +} > + > +static struct kmsg_dumper hv_kmsg_dumper = { > + .dump = hv_kmsg_dump, > +}; > + > +static struct ctl_table_header *hv_ctl_table_hdr; > +static int zero; > +static int one = 1; > + > +static struct ctl_table hv_ctl_table[] = { > + { > + .procname = "record_panic_msg", > + .data = &sysctl_record_panic_msg, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = proc_dointvec_minmax, > + .extra1 = &zero, > + .extra2 = &one > + }, > + {} > +}; > + > +static struct ctl_table hv_ctl_dir[] = { > + { > + .procname = "hyperv", > + .mode = 0555, > + .child = hv_ctl_table > + }, > + {} > +}; > + > +static struct ctl_table hv_root_table[] = { > + { > + .procname = "kernel", > + .mode = 0555, > + .child = hv_ctl_dir > + }, > + {} > +}; > > /* > * vmbus_bus_init -Main vmbus driver initialization routine. > @@ -1065,6 +1147,31 @@ static int vmbus_bus_init(void) > * Only register if the crash MSRs are available > */ > if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { > + if (!hv_ctl_table_hdr) { > + > + /* No blank line needed. > + * Sysctl registration is not fatal, since default is > + * for operation. > + */ > + hv_ctl_table_hdr = > + register_sysctl_table(hv_root_table); > + if (!hv_ctl_table_hdr) > + pr_err("Hyper-v: sysctl table register error"); > + } > + > + hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL); > + if (!hv_panic_page) { > + ret = -ENOMEM; > + goto err_connect; > + } > + > + ret = kmsg_dump_register(&hv_kmsg_dumper); > + if (ret) { > + pr_err("Hyper-V - kmsg dump register failure 0x%x\n", > + ret); > + goto err_connect; > + } > + > register_die_notifier(&hyperv_die_block); > atomic_notifier_chain_register(&panic_notifier_list, > &hyperv_panic_block); > @@ -1081,6 +1188,15 @@ static int vmbus_bus_init(void) > hv_remove_vmbus_irq(); > > bus_unregister(&hv_bus); > + if (hv_panic_page) { Again, no need to check this. > + free_page((unsigned long)hv_panic_page); > + hv_panic_page = NULL; > + } > + > + if (!hv_ctl_table_hdr) { > + unregister_sysctl_table(hv_ctl_table_hdr); > + hv_ctl_table_hdr = NULL; > + } > > return ret; > } > @@ -1785,10 +1901,22 @@ static void __exit vmbus_exit(void) > vmbus_free_channels(); > > if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { > + kmsg_dump_unregister(&hv_kmsg_dumper); > unregister_die_notifier(&hyperv_die_block); > atomic_notifier_chain_unregister(&panic_notifier_list, > &hyperv_panic_block); > } > + > + if (hv_panic_page) { Again, no need to check. I thought I asked about this last code review? thanks, greg k-h _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel