RE: [PATCH v5] Drivers: HV: Send one page worth of kmsg dump over Hyper-V during panic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: Sunil Muthuswamy
> Sent: Friday, June 8, 2018 11:39 AM
> To: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>
> Cc: devel@xxxxxxxxxxxxxxxxxxxxxx; Sunil Muthuswamy
> <sunilmut@xxxxxxxxxxxxx>; KY Srinivasan <kys@xxxxxxxxxxxxx>; Stephen
> Hemminger <sthemmin@xxxxxxxxxxxxx>
> Subject: [PATCH v5] Drivers: HV: Send one page worth of kmsg dump over
> Hyper-V during panic
> 
> In the VM mode on Hyper-V, currently, when the kernel panics, an error
> code and few register values are populated in an MSR and the Hypervisor
> notified. This information is collected on the host. The amount of
> information currently collected is found to be limited and not very
> actionable. To gather more actionable data, such as stack trace, the
> proposal is to write one page worth of kmsg data on an allocated page
> and the Hypervisor notified of the page address through the MSR.
> 
> - Sysctl option to control the behavior, with ON by default.
> 
> Cc: K. Y. Srinivasan <kys@xxxxxxxxxxxxx>
> Cc: Stephen Hemminger <sthemmin@xxxxxxxxxxxxx>
> Signed-off-by: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx>
> ---
> Changes since v1:
> - Added a sysctl config option to allow the user to control if panic message
>   should be reported to Hyper-V.
> 
> Changes since v2:
> - Addressed comments from Greg KH.
> - Moved the sysctl config option from 'kernel.hyperv' to 'kernel.' since the
>   likelihood of having many Hyper-V specific sysctl config options seemed less
>   and so having a specific dir for Hyper-V seemed unnecessary overhead.
> 
> Changes since v3:
> - The feature is only enabled if the capability is supported by the hypervisor.
>   This is done by reading the crash control MSR, in addition to the pre-existing
>   check for the hypervisor feature using the CPUID.
> 
> Changes since v4:
> - Addressed comments from Dexuan Cui: moved the global variable to static.
> ---
>  Documentation/sysctl/kernel.txt    |  11 ++++
>  arch/x86/hyperv/hv_init.c          |  27 +++++++++
>  arch/x86/include/asm/hyperv-tlfs.h |   5 +-
>  arch/x86/include/asm/mshyperv.h    |   1 +
>  drivers/hv/vmbus_drv.c             | 116
> +++++++++++++++++++++++++++++++++++++
>  5 files changed, 158 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/sysctl/kernel.txt
> b/Documentation/sysctl/kernel.txt
> index eded671d..5958503 100644
> --- a/Documentation/sysctl/kernel.txt
> +++ b/Documentation/sysctl/kernel.txt
> @@ -39,6 +39,7 @@ show up in /proc/sys/kernel:
>  - hung_task_check_count
>  - hung_task_timeout_secs
>  - hung_task_warnings
> +- hyperv_record_panic_msg
>  - kexec_load_disabled
>  - kptr_restrict
>  - l2cr                        [ PPC only ]
> @@ -374,6 +375,16 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is
> enabled.
> 
> 
> ==========================================================
> ====
> 
> +hyperv_record_panic_msg:
> +
> +Controls whether the panic kmsg data should be reported to Hyper-V.
> +
> +0: do not report panic kmsg data.
> +
> +1: report the panic kmsg data. This is the default behavior.
> +
> +=========================================================
> =====
> +
>  kexec_load_disabled:
> 
>  A toggle indicating if the kexec_load syscall has been disabled. This
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index cfecc22..af57162 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -395,6 +395,33 @@ void hyperv_report_panic(struct pt_regs *regs, long
> err)
>  }
>  EXPORT_SYMBOL_GPL(hyperv_report_panic);
> 
> +/**
> + * hyperv_report_panic_msg - report panic message to Hyper-V
> + * @pa: physical address of the panic page containing the message
> + * @size: size of the message in the page
> + */
> +void hyperv_report_panic_msg(phys_addr_t pa, size_t size)
> +{
> +	/*
> +	 * P3 to contain the physical address of the panic page & P4 to
> +	 * contain the size of the panic data in that page. Rest of the
> +	 * registers are no-op when the NOTIFY_MSG flag is set.
> +	 */
> +	wrmsrl(HV_X64_MSR_CRASH_P0, 0);
> +	wrmsrl(HV_X64_MSR_CRASH_P1, 0);
> +	wrmsrl(HV_X64_MSR_CRASH_P2, 0);
> +	wrmsrl(HV_X64_MSR_CRASH_P3, pa);
> +	wrmsrl(HV_X64_MSR_CRASH_P4, size);
> +
> +	/*
> +	 * Let Hyper-V know there is crash data available along with
> +	 * the panic message.
> +	 */
> +	wrmsrl(HV_X64_MSR_CRASH_CTL,
> +	       (HV_CRASH_CTL_CRASH_NOTIFY |
> HV_CRASH_CTL_CRASH_NOTIFY_MSG));
> +}
> +EXPORT_SYMBOL_GPL(hyperv_report_panic_msg);
> +
>  bool hv_is_hyperv_initialized(void)
>  {
>  	union hv_x64_msr_hypercall_contents hypercall_msr;
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h
> b/arch/x86/include/asm/hyperv-tlfs.h
> index 416cb0e..76c58af 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -171,9 +171,10 @@
>  #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED    (1 << 14)
> 
>  /*
> - * Crash notification flag.
> + * Crash notification flags.
>   */
> -#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63)
> +#define HV_CRASH_CTL_CRASH_NOTIFY_MSG	BIT_ULL(62)
> +#define HV_CRASH_CTL_CRASH_NOTIFY	BIT_ULL(63)
> 
>  /* MSR used to identify the guest OS. */
>  #define HV_X64_MSR_GUEST_OS_ID			0x40000000
> diff --git a/arch/x86/include/asm/mshyperv.h
> b/arch/x86/include/asm/mshyperv.h
> index b90e796..ac83f2d 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -262,6 +262,7 @@ void hyperv_init(void);
>  void hyperv_setup_mmu_ops(void);
>  void hyper_alloc_mmu(void);
>  void hyperv_report_panic(struct pt_regs *regs, long err);
> +void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
>  bool hv_is_hyperv_initialized(void);
>  void hyperv_cleanup(void);
> 
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index b10fe26..eec3b235 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -56,6 +56,8 @@ static struct completion probe_event;
> 
>  static int hyperv_cpuhp_online;
> 
> +static void *hv_panic_page;
> +
>  static int hyperv_panic_event(struct notifier_block *nb, unsigned long val,
>  			      void *args)
>  {
> @@ -1018,6 +1020,75 @@ static void vmbus_isr(void)
>  	add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
>  }
> 
> +/*
> + * Boolean to control whether to report panic messages over Hyper-V.
> + *
> + * It can be set via /proc/sys/kernel/hyperv/record_panic_msg
> + */
> +static int sysctl_record_panic_msg = 1;
> +
> +/*
> + * Callback from kmsg_dump. Grab as much as possible from the end of the
> kmsg
> + * buffer and call into Hyper-V to transfer the data.
> + */
> +static void hv_kmsg_dump(struct kmsg_dumper *dumper,
> +			 enum kmsg_dump_reason reason)
> +{
> +	size_t bytes_written;
> +	phys_addr_t panic_pa;
> +
> +	/* We are only interested in panics. */
> +	if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg))
> +		return;
> +
> +	panic_pa = virt_to_phys(hv_panic_page);
> +
> +	/*
> +	 * Write dump contents to the page. No need to synchronize; panic
> should
> +	 * be single-threaded.
> +	 */
> +	if (!kmsg_dump_get_buffer(dumper, true, hv_panic_page,
> +				  PAGE_SIZE, &bytes_written)) {
> +		pr_err("Hyper-V: Unable to get kmsg data for panic\n");
> +		return;
> +	}
> +
> +	hyperv_report_panic_msg(panic_pa, bytes_written);
> +}
> +
> +static struct kmsg_dumper hv_kmsg_dumper = {
> +	.dump = hv_kmsg_dump,
> +};
> +
> +static struct ctl_table_header *hv_ctl_table_hdr;
> +static int zero;
> +static int one = 1;
> +
> +/*
> + * sysctl option to allow the user to control whether kmsg data should be
> + * reported to Hyper-V on panic.
> + */
> +static struct ctl_table hv_ctl_table[] = {
> +	{
> +		.procname       = "hyperv_record_panic_msg",
> +		.data           = &sysctl_record_panic_msg,
> +		.maxlen         = sizeof(int),
> +		.mode           = 0644,
> +		.proc_handler   = proc_dointvec_minmax,
> +		.extra1		= &zero,
> +		.extra2		= &one
> +	},
> +	{}
> +};
> +
> +static struct ctl_table hv_root_table[] = {
> +	{
> +		.procname	= "kernel",
> +		.mode		= 0555,
> +		.child		= hv_ctl_table
> +	},
> +	{}
> +};
> 
>  /*
>   * vmbus_bus_init -Main vmbus driver initialization routine.
> @@ -1065,6 +1136,38 @@ static int vmbus_bus_init(void)
>  	 * Only register if the crash MSRs are available
>  	 */
>  	if (ms_hyperv.misc_features &
> HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
> +		u64 hyperv_crash_ctl;
> +		if (!hv_ctl_table_hdr) {
When vmus_bus_init is invoked, would hv_ctl_table_hdr  ever be 
Non-NULL?
> +			/*
> +			 * Sysctl registration is not fatal, since default is
> +			 * for operation.
> +			 */
> +			hv_ctl_table_hdr =
> +				register_sysctl_table(hv_root_table);
> +			if (!hv_ctl_table_hdr)
> +				pr_err("Hyper-V: sysctl table register error");
> +		}
> +
> +		/*
> +		 * Register for panic kmsg callback only if the right
> +		 * capability is supported by the hypervisor.
> +		 */
> +		rdmsrl(HV_X64_MSR_CRASH_CTL, hyperv_crash_ctl);
> +		if (hyperv_crash_ctl &
> HV_CRASH_CTL_CRASH_NOTIFY_MSG) {
> +			hv_panic_page = (void
> *)get_zeroed_page(GFP_KERNEL);
> +			if (!hv_panic_page) {
Why is this allocation failure fatal in terms failing the load of the driver?
If we failed to allocate why can't we revert to the existing panic notification
scheme. 
> +				ret = -ENOMEM;
> +				goto err_connect;
> +			}
> +
> +			ret = kmsg_dump_register(&hv_kmsg_dumper);
> +			if (ret) {
> +				pr_err("Hyper-V: kmsg dump register error "
> +					"0x%x\n", ret);
> +				goto err_connect;
> +			}
> +		}
> +
>  		register_die_notifier(&hyperv_die_block);
>  		atomic_notifier_chain_register(&panic_notifier_list,
>  					       &hyperv_panic_block);
> @@ -1081,6 +1184,11 @@ static int vmbus_bus_init(void)
>  	hv_remove_vmbus_irq();
> 
>  	bus_unregister(&hv_bus);
> +	free_page((unsigned long)hv_panic_page);
> +	if (!hv_ctl_table_hdr) {
> +		unregister_sysctl_table(hv_ctl_table_hdr);
> +		hv_ctl_table_hdr = NULL;
> +	}
> 
>  	return ret;
>  }
> @@ -1785,10 +1893,18 @@ static void __exit vmbus_exit(void)
>  	vmbus_free_channels();
> 
>  	if (ms_hyperv.misc_features &
> HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
> +		kmsg_dump_unregister(&hv_kmsg_dumper);
>  		unregister_die_notifier(&hyperv_die_block);
>  		atomic_notifier_chain_unregister(&panic_notifier_list,
>  						 &hyperv_panic_block);
>  	}
> +
> +	free_page((unsigned long)hv_panic_page);
> +	if (!hv_ctl_table_hdr) {
> +		unregister_sysctl_table(hv_ctl_table_hdr);
> +		hv_ctl_table_hdr = NULL;
> +	}
> +
>  	bus_unregister(&hv_bus);
> 
>  	cpuhp_remove_state(hyperv_cpuhp_online);
> --
> 2.7.4

_______________________________________________
devel mailing list
devel@xxxxxxxxxxxxxxxxxxxxxx
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel



[Index of Archives]     [Linux Driver Backports]     [DMA Engine]     [Linux GPIO]     [Linux SPI]     [Video for Linux]     [Linux USB Devel]     [Linux Coverity]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]
  Powered by Linux