Add a thermal cooling driver to provide path to access PCIe bandwidth controller using the usual thermal interfaces. A cooling device is instantiated for controllable PCIe Ports from the bwctrl service driver. The thermal side state 0 means no throttling, i.e., maximum supported PCIe Link Speed. Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> Acked-by: Rafael J. Wysocki <rafael@xxxxxxxxxx> # From the cooling device interface perspective --- MAINTAINERS | 2 + drivers/pci/pcie/bwctrl.c | 8 ++++ drivers/thermal/Kconfig | 10 ++++ drivers/thermal/Makefile | 2 + drivers/thermal/pcie_cooling.c | 84 ++++++++++++++++++++++++++++++++++ include/linux/pci-bwctrl.h | 28 ++++++++++++ 6 files changed, 134 insertions(+) create mode 100644 drivers/thermal/pcie_cooling.c create mode 100644 include/linux/pci-bwctrl.h diff --git a/MAINTAINERS b/MAINTAINERS index bd420cb09821..3a94ae81b13f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17104,6 +17104,8 @@ M: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> L: linux-pci@xxxxxxxxxxxxxxx S: Supported F: drivers/pci/pcie/bwctrl.c +F: drivers/thermal/pcie_cooling.c +F: include/linux/pci-bwctrl.h PCIE DRIVER FOR AMAZON ANNAPURNA LABS M: Jonathan Chocron <jonnyc@xxxxxxxxxx> diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index e97665848158..5d1df9e6a349 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -27,6 +27,7 @@ #include <linux/interrupt.h> #include <linux/mutex.h> #include <linux/pci.h> +#include <linux/pci-bwctrl.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/types.h> @@ -38,10 +39,12 @@ * struct pcie_bwctrl_data - PCIe bandwidth controller * @set_speed_mutex: Serializes link speed changes * @lbms_count: Count for LBMS (since last reset) + * @cdev: thermal cooling device associated with the port */ struct pcie_bwctrl_data { struct mutex set_speed_mutex; atomic_t lbms_count; + struct thermal_cooling_device *cdev; }; static bool pcie_valid_speed(enum pci_bus_speed speed) @@ -287,6 +290,8 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) pcie_bwnotif_enable(srv); pci_info(port, "enabled with IRQ %d\n", srv->irq); + port->link_bwctrl->cdev = pcie_cooling_device_register(port); + return 0; } @@ -294,6 +299,9 @@ static void pcie_bwnotif_remove(struct pcie_device *srv) { struct pcie_bwctrl_data *data = get_service_data(srv); + if (data->cdev) + pcie_cooling_device_unregister(data->cdev); + scoped_guard(rwsem_write, &pcie_bwctrl_remove_rwsem) srv->port->link_bwctrl = NULL; diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 204ed89a3ec9..7ddc44be4701 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -211,6 +211,16 @@ config DEVFREQ_THERMAL If you want this support, you should say Y here. +config PCIE_THERMAL + bool "PCIe cooling support" + depends on PCIEPORTBUS + select PCIE_BWCTRL + help + This implements PCIe cooling mechanism through bandwidth reduction + for PCIe devices. + + If you want this support, you should say Y here. + config THERMAL_EMULATION bool "Thermal emulation mode support" help diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 5cdf7d68687f..5b9bf8e80eb6 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -31,6 +31,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL) += cpuidle_cooling.o # devfreq cooling thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o +thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o + obj-$(CONFIG_K3_THERMAL) += k3_bandgap.o k3_j72xx_bandgap.o # platform thermal drivers obj-y += broadcom/ diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c new file mode 100644 index 000000000000..367290d57e99 --- /dev/null +++ b/drivers/thermal/pcie_cooling.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PCIe cooling device + * + * Copyright (C) 2023 Intel Corporation + */ + +#include <linux/build_bug.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/pci-bwctrl.h> +#include <linux/slab.h> +#include <linux/sprintf.h> +#include <linux/thermal.h> + +#define COOLING_DEV_TYPE_PREFIX "PCIe_Port_Link_Speed_" + +static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct pci_dev *port = cdev->devdata; + + /* cooling state 0 is same as the maximum PCIe speed */ + *state = port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT; + + return 0; +} + +static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct pci_dev *port = cdev->devdata; + + /* cooling state 0 is same as the maximum PCIe speed */ + *state = cdev->max_state - (port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT); + + return 0; +} + +static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state) +{ + struct pci_dev *port = cdev->devdata; + enum pci_bus_speed speed; + + /* cooling state 0 is same as the maximum PCIe speed */ + speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT; + + return pcie_set_target_speed(port, speed, true); +} + +static struct thermal_cooling_device_ops pcie_cooling_ops = { + .get_max_state = pcie_cooling_get_max_level, + .get_cur_state = pcie_cooling_get_cur_level, + .set_cur_state = pcie_cooling_set_cur_level, +}; + +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + struct thermal_cooling_device *cdev; + char *name; + + name = kasprintf(GFP_KERNEL, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port)); + if (!name) + return ERR_PTR(-ENOMEM); + + cdev = thermal_cooling_device_register(name, port, &pcie_cooling_ops); + kfree(name); + + return cdev; +} + +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ + thermal_cooling_device_unregister(cdev); +} + +/* For bus_speed <-> state arithmetic */ +static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT); +static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT); +static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT); +static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT); +static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT); + +MODULE_AUTHOR("Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx>"); +MODULE_DESCRIPTION("PCIe cooling driver"); diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h new file mode 100644 index 000000000000..cee07127455b --- /dev/null +++ b/include/linux/pci-bwctrl.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PCIe bandwidth controller + * + * Copyright (C) 2023-2024 Intel Corporation + */ + +#ifndef LINUX_PCI_BWCTRL_H +#define LINUX_PCI_BWCTRL_H + +#include <linux/pci.h> + +struct thermal_cooling_device; + +#ifdef CONFIG_PCIE_THERMAL +struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port); +void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev); +#else +static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port) +{ + return NULL; +} +static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev) +{ +} +#endif + +#endif -- 2.39.2