This is a note to let you know that I've just added the patch titled powercap: intel_rapl: Fix locking in TPMI RAPL to the 6.8-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: powercap-intel_rapl-fix-locking-in-tpmi-rapl.patch and it can be found in the queue-6.8 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit f7017726c966339a91ed8fd9eb484cbba302e65e Author: Zhang Rui <rui.zhang@xxxxxxxxx> Date: Wed Jan 31 19:37:09 2024 +0800 powercap: intel_rapl: Fix locking in TPMI RAPL [ Upstream commit 1aa09b9379a7a644cd2f75ae0bac82b8783df600 ] The RAPL framework uses CPU hotplug locking to protect the rapl_packages list and rp->lead_cpu to guarantee that 1. the RAPL package device is not unprobed and freed 2. the cached rp->lead_cpu is always valid for operations like powercap sysfs accesses. Current RAPL APIs assume being called from CPU hotplug callbacks which hold the CPU hotplug lock, but TPMI RAPL driver invokes the APIs in the driver's .probe() function without acquiring the CPU hotplug lock. Fix the problem by providing both locked and lockless versions of RAPL APIs. Fixes: 9eef7f9da928 ("powercap: intel_rapl: Introduce RAPL TPMI interface driver") Signed-off-by: Zhang Rui <rui.zhang@xxxxxxxxx> Cc: 6.5+ <stable@xxxxxxxxxxxxxxx> # 6.5+ Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@xxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 1a739afd47d96..9d3e102f1a76b 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -5,6 +5,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/list.h> @@ -1504,7 +1505,7 @@ static int rapl_detect_domains(struct rapl_package *rp) } /* called from CPU hotplug notifier, hotplug lock held */ -void rapl_remove_package(struct rapl_package *rp) +void rapl_remove_package_cpuslocked(struct rapl_package *rp) { struct rapl_domain *rd, *rd_package = NULL; @@ -1533,10 +1534,18 @@ void rapl_remove_package(struct rapl_package *rp) list_del(&rp->plist); kfree(rp); } +EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked); + +void rapl_remove_package(struct rapl_package *rp) +{ + guard(cpus_read_lock)(); + rapl_remove_package_cpuslocked(rp); +} EXPORT_SYMBOL_GPL(rapl_remove_package); /* caller to ensure CPU hotplug lock is held */ -struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) +struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu) { struct rapl_package *rp; int uid; @@ -1554,10 +1563,17 @@ struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, return NULL; } +EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked); + +struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) +{ + guard(cpus_read_lock)(); + return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu); +} EXPORT_SYMBOL_GPL(rapl_find_package_domain); /* called from CPU hotplug notifier, hotplug lock held */ -struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) +struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu) { struct rapl_package *rp; int ret; @@ -1603,6 +1619,13 @@ struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id kfree(rp); return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked); + +struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) +{ + guard(cpus_read_lock)(); + return rapl_add_package_cpuslocked(id, priv, id_is_cpu); +} EXPORT_SYMBOL_GPL(rapl_add_package); static void power_limit_state_save(void) diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 250bd41a588c7..b4b6930cacb0b 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -73,9 +73,9 @@ static int rapl_cpu_online(unsigned int cpu) { struct rapl_package *rp; - rp = rapl_find_package_domain(cpu, rapl_msr_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true); if (!rp) { - rp = rapl_add_package(cpu, rapl_msr_priv, true); + rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true); if (IS_ERR(rp)) return PTR_ERR(rp); } @@ -88,14 +88,14 @@ static int rapl_cpu_down_prep(unsigned int cpu) struct rapl_package *rp; int lead_cpu; - rp = rapl_find_package_domain(cpu, rapl_msr_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true); if (!rp) return 0; cpumask_clear_cpu(cpu, &rp->cpumask); lead_cpu = cpumask_first(&rp->cpumask); if (lead_cpu >= nr_cpu_ids) - rapl_remove_package(rp); + rapl_remove_package_cpuslocked(rp); else if (rp->lead_cpu == cpu) rp->lead_cpu = lead_cpu; return 0; diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c index 2f00fc3bf274a..e964a9375722a 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c @@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu) if (topology_physical_package_id(cpu)) return 0; - rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true); if (!rp) { - rp = rapl_add_package(cpu, &rapl_mmio_priv, true); + rp = rapl_add_package_cpuslocked(cpu, &rapl_mmio_priv, true); if (IS_ERR(rp)) return PTR_ERR(rp); } @@ -42,14 +42,14 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu) struct rapl_package *rp; int lead_cpu; - rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true); if (!rp) return 0; cpumask_clear_cpu(cpu, &rp->cpumask); lead_cpu = cpumask_first(&rp->cpumask); if (lead_cpu >= nr_cpu_ids) - rapl_remove_package(rp); + rapl_remove_package_cpuslocked(rp); else if (rp->lead_cpu == cpu) rp->lead_cpu = lead_cpu; return 0; diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h index 33f21bd85dbf2..f3196f82fd8a1 100644 --- a/include/linux/intel_rapl.h +++ b/include/linux/intel_rapl.h @@ -178,6 +178,12 @@ struct rapl_package { struct rapl_if_priv *priv; }; +struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu); +void rapl_remove_package_cpuslocked(struct rapl_package *rp); + struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu); struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu); void rapl_remove_package(struct rapl_package *rp);