Re: [PATCH v11 2/2] misc: Add a mechanism to detect stalls on guest vCPUs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jul 08, 2022 at 11:23:45AM +0000, Sebastian Ene wrote:
> This driver creates per-cpu hrtimers which are required to do the
> periodic 'pet' operation. On a conventional watchdog-core driver, the
> userspace is responsible for delivering the 'pet' events by writing to
> the particular /dev/watchdogN node. In this case we require a strong
> thread affinity to be able to account for lost time on a per vCPU.
> 
> This part of the driver is the 'frontend' which is reponsible for
> delivering the periodic 'pet' events, configuring the virtual peripheral
> and listening for cpu hotplug events. The other part of the driver is
> an emulated MMIO device which is part of the KVM virtual machine
> monitor and this part accounts for lost time by looking at the
> /proc/{}/task/{}/stat entries.
> 
> Reviewed-by: Will Deacon <will@xxxxxxxxxx>
> Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx>

Reviewed-by: Guenter Roeck <linux@xxxxxxxxxxxx>

> ---
>  drivers/misc/Kconfig               |  14 ++
>  drivers/misc/Makefile              |   1 +
>  drivers/misc/vcpu_stall_detector.c | 223 +++++++++++++++++++++++++++++
>  3 files changed, 238 insertions(+)
>  create mode 100644 drivers/misc/vcpu_stall_detector.c
> 
> diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> index 41d2bb0ae23a..d5b7610459f7 100644
> --- a/drivers/misc/Kconfig
> +++ b/drivers/misc/Kconfig
> @@ -483,6 +483,20 @@ config OPEN_DICE
>  
>  	  If unsure, say N.
>  
> +config VCPU_STALL_DETECTOR
> +	tristate "Guest vCPU stall detector"
> +	select LOCKUP_DETECTOR
> +	depends on OF && HAS_IOMEM
> +	help
> +	  When this driver is bound inside a KVM guest, it will
> +	  periodically "pet" an MMIO stall detector device from each vCPU
> +	  and allow the host to detect vCPU stalls.
> +
> +	  To compile this driver as a module, choose M here: the module
> +	  will be called vcpu_stall_detector.
> +
> +	  If you do not intend to run this kernel as a guest, say N.
> +
>  source "drivers/misc/c2port/Kconfig"
>  source "drivers/misc/eeprom/Kconfig"
>  source "drivers/misc/cb710/Kconfig"
> diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
> index 70e800e9127f..2be8542616dd 100644
> --- a/drivers/misc/Makefile
> +++ b/drivers/misc/Makefile
> @@ -60,3 +60,4 @@ obj-$(CONFIG_XILINX_SDFEC)	+= xilinx_sdfec.o
>  obj-$(CONFIG_HISI_HIKEY_USB)	+= hisi_hikey_usb.o
>  obj-$(CONFIG_HI6421V600_IRQ)	+= hi6421v600-irq.o
>  obj-$(CONFIG_OPEN_DICE)		+= open-dice.o
> +obj-$(CONFIG_VCPU_STALL_DETECTOR)	+= vcpu_stall_detector.o
> \ No newline at end of file
> diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> new file mode 100644
> index 000000000000..53b5506080e1
> --- /dev/null
> +++ b/drivers/misc/vcpu_stall_detector.c
> @@ -0,0 +1,223 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +//
> +// VCPU stall detector.
> +//  Copyright (C) Google, 2022
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/nmi.h>
> +#include <linux/of.h>
> +#include <linux/of_device.h>
> +#include <linux/param.h>
> +#include <linux/percpu.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#define VCPU_STALL_REG_STATUS		(0x00)
> +#define VCPU_STALL_REG_LOAD_CNT		(0x04)
> +#define VCPU_STALL_REG_CURRENT_CNT	(0x08)
> +#define VCPU_STALL_REG_CLOCK_FREQ_HZ	(0x0C)
> +#define VCPU_STALL_REG_LEN		(0x10)
> +
> +#define VCPU_STALL_DEFAULT_CLOCK_HZ	(10)
> +#define VCPU_STALL_MAX_CLOCK_HZ		(100)
> +#define VCPU_STALL_DEFAULT_TIMEOUT_SEC	(8)
> +#define VCPU_STALL_MAX_TIMEOUT_SEC	(600)
> +
> +struct vcpu_stall_detect_config {
> +	u32 clock_freq_hz;
> +	u32 stall_timeout_sec;
> +
> +	void __iomem *membase;
> +	struct platform_device *dev;
> +	enum cpuhp_state hp_online;
> +};
> +
> +struct vcpu_stall_priv {
> +	struct hrtimer vcpu_hrtimer;
> +	bool is_initialized;
> +};
> +
> +/* The vcpu stall configuration structure which applies to all the CPUs */
> +static struct vcpu_stall_detect_config vcpu_stall_config;
> +
> +#define vcpu_stall_reg_write(vcpu, reg, value)				\
> +	writel_relaxed((value),						\
> +		       (void __iomem *)(vcpu_stall_config.membase +	\
> +		       (vcpu) * VCPU_STALL_REG_LEN + (reg)))
> +
> +
> +static struct vcpu_stall_priv __percpu *vcpu_stall_detectors;
> +
> +static enum hrtimer_restart
> +vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
> +{
> +	u32 ticks, ping_timeout_ms;
> +
> +	/* Reload the stall detector counter register every
> +	 * `ping_timeout_ms` to prevent the virtual device
> +	 * from decrementing it to 0. The virtual device decrements this
> +	 * register at 'clock_freq_hz' frequency.
> +	 */
> +	ticks = vcpu_stall_config.clock_freq_hz *
> +		vcpu_stall_config.stall_timeout_sec;
> +	vcpu_stall_reg_write(smp_processor_id(),
> +			     VCPU_STALL_REG_LOAD_CNT, ticks);
> +
> +	ping_timeout_ms = vcpu_stall_config.stall_timeout_sec *
> +			  MSEC_PER_SEC / 2;
> +	hrtimer_forward_now(hrtimer,
> +			    ms_to_ktime(ping_timeout_ms));
> +
> +	return HRTIMER_RESTART;
> +}
> +
> +static int start_stall_detector_cpu(unsigned int cpu)
> +{
> +	u32 ticks, ping_timeout_ms;
> +	struct vcpu_stall_priv *vcpu_stall_detector =
> +		this_cpu_ptr(vcpu_stall_detectors);
> +	struct hrtimer *vcpu_hrtimer = &vcpu_stall_detector->vcpu_hrtimer;
> +
> +	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_CLOCK_FREQ_HZ,
> +			     vcpu_stall_config.clock_freq_hz);
> +
> +	/* Compute the number of ticks required for the stall detector
> +	 * counter register based on the internal clock frequency and the
> +	 * timeout value given from the device tree.
> +	 */
> +	ticks = vcpu_stall_config.clock_freq_hz *
> +		vcpu_stall_config.stall_timeout_sec;
> +	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_LOAD_CNT, ticks);
> +
> +	/* Enable the internal clock and start the stall detector */
> +	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 1);
> +
> +	/* Pet the stall detector at half of its expiration timeout
> +	 * to prevent spurious resets.
> +	 */
> +	ping_timeout_ms = vcpu_stall_config.stall_timeout_sec *
> +			  MSEC_PER_SEC / 2;
> +
> +	hrtimer_init(vcpu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	vcpu_hrtimer->function = vcpu_stall_detect_timer_fn;
> +	vcpu_stall_detector->is_initialized = true;
> +
> +	hrtimer_start(vcpu_hrtimer, ms_to_ktime(ping_timeout_ms),
> +		      HRTIMER_MODE_REL_PINNED);
> +
> +	return 0;
> +}
> +
> +static int stop_stall_detector_cpu(unsigned int cpu)
> +{
> +	struct vcpu_stall_priv *vcpu_stall_detector =
> +		per_cpu_ptr(vcpu_stall_detectors, cpu);
> +
> +	if (!vcpu_stall_detector->is_initialized)
> +		return 0;
> +
> +	/* Disable the stall detector for the current CPU */
> +	hrtimer_cancel(&vcpu_stall_detector->vcpu_hrtimer);
> +	vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 0);
> +	vcpu_stall_detector->is_initialized = false;
> +
> +	return 0;
> +}
> +
> +static int vcpu_stall_detect_probe(struct platform_device *pdev)
> +{
> +	int ret;
> +	struct resource *r;
> +	void __iomem *membase;
> +	u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> +	u32 stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
> +	struct device_node *np = pdev->dev.of_node;
> +
> +	vcpu_stall_detectors = devm_alloc_percpu(&pdev->dev,
> +						 typeof(struct vcpu_stall_priv));
> +	if (!vcpu_stall_detectors)
> +		return -ENOMEM;
> +
> +	membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
> +	if (IS_ERR(membase)) {
> +		dev_err(&pdev->dev, "Failed to get memory resource\n");
> +		return PTR_ERR(membase);
> +	}
> +
> +	if (!of_property_read_u32(np, "clock-frequency", &clock_freq_hz)) {
> +		if (!(clock_freq_hz > 0 &&
> +		      clock_freq_hz < VCPU_STALL_MAX_CLOCK_HZ)) {
> +			dev_warn(&pdev->dev, "clk out of range\n");
> +			clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> +		}
> +	}
> +
> +	if (!of_property_read_u32(np, "timeout-sec", &stall_timeout_sec)) {
> +		if (!(stall_timeout_sec > 0 &&
> +		      stall_timeout_sec < VCPU_STALL_MAX_TIMEOUT_SEC)) {
> +			dev_warn(&pdev->dev, "stall timeout out of range\n");
> +			stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC;
> +		}
> +	}
> +
> +	vcpu_stall_config = (struct vcpu_stall_detect_config) {
> +		.membase		= membase,
> +		.clock_freq_hz		= clock_freq_hz,
> +		.stall_timeout_sec	= stall_timeout_sec
> +	};
> +
> +	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> +				"virt/vcpu_stall_detector:online",
> +				start_stall_detector_cpu,
> +				stop_stall_detector_cpu);
> +	if (ret < 0) {
> +		dev_err(&pdev->dev, "failed to install cpu hotplug");
> +		goto err;
> +	}
> +
> +	vcpu_stall_config.hp_online = ret;
> +	return 0;
> +err:
> +	return ret;
> +}
> +
> +static int vcpu_stall_detect_remove(struct platform_device *pdev)
> +{
> +	int cpu;
> +
> +	cpuhp_remove_state(vcpu_stall_config.hp_online);
> +
> +	for_each_possible_cpu(cpu)
> +		stop_stall_detector_cpu(cpu);
> +
> +	return 0;
> +}
> +
> +static const struct of_device_id vcpu_stall_detect_of_match[] = {
> +	{ .compatible = "qemu,vcpu-stall-detector", },
> +	{}
> +};
> +
> +MODULE_DEVICE_TABLE(of, vcpu_stall_detect_of_match);
> +
> +static struct platform_driver vcpu_stall_detect_driver = {
> +	.probe  = vcpu_stall_detect_probe,
> +	.remove = vcpu_stall_detect_remove,
> +	.driver = {
> +		.name           = KBUILD_MODNAME,
> +		.of_match_table = vcpu_stall_detect_of_match,
> +	},
> +};
> +
> +module_platform_driver(vcpu_stall_detect_driver);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Sebastian Ene <sebastianene@xxxxxxxxxx>");
> +MODULE_DESCRIPTION("VCPU stall detector");
> -- 
> 2.37.0.rc0.161.g10f37bed90-goog
> 



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux