On 7/10/21 12:57 AM, Bjorn Andersson wrote:
On Thu 08 Jul 07:06 CDT 2021, Thara Gopinath wrote:
Add interrupt support to notify the kernel of h/w initiated frequency
throttling by LMh. Convey this to scheduler via thermal presssure
interface.
Signed-off-by: Thara Gopinath <thara.gopinath@xxxxxxxxxx>
---
v2->v3:
- Cosmetic fixes from review comments on the list.
- Moved all LMh initializations to qcom_cpufreq_hw_lmh_init.
- Added freeing of LMh interrupt and cancelling the polling worker to
qcom_cpufreq_hw_cpu_exit as per Viresh's suggestion.
- LMh interrupts are now tied to cpu dev and not cpufreq dev. This will be
useful for further generation of SoCs where the same interrupt signals
multiple cpu clusters.
v1->v2:
- Introduced qcom_cpufreq_hw_lmh_init to consolidate LMh related initializations
as per Viresh's review comment.
- Moved the piece of code restarting polling/re-enabling LMh interrupt to
qcom_lmh_dcvs_notify therby simplifying isr and timer callback as per Viresh's
suggestion.
- Droped cpus from qcom_cpufreq_data and instead using cpus from cpufreq_policy in
qcom_lmh_dcvs_notify as per Viresh's review comment.
- Dropped dt property qcom,support-lmh as per Bjorn's suggestion.
- Other minor/cosmetic fixes
drivers/cpufreq/qcom-cpufreq-hw.c | 118 ++++++++++++++++++++++++++++++
1 file changed, 118 insertions(+)
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f86859bf76f1..bb5fc700d913 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -7,6 +7,7 @@
#include <linux/cpufreq.h>
#include <linux/init.h>
#include <linux/interconnect.h>
+#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of_address.h>
@@ -22,10 +23,13 @@
#define CLK_HW_DIV 2
#define LUT_TURBO_IND 1
+#define HZ_PER_KHZ 1000
+
struct qcom_cpufreq_soc_data {
u32 reg_enable;
u32 reg_freq_lut;
u32 reg_volt_lut;
+ u32 reg_current_vote;
u32 reg_perf_state;
u8 lut_row_size;
};
@@ -33,7 +37,10 @@ struct qcom_cpufreq_soc_data {
struct qcom_cpufreq_data {
void __iomem *base;
struct resource *res;
+ struct delayed_work lmh_dcvs_poll_work;
How about dropping "lmh" from this variable name?
Perhaps "throttle_work" or something like that?
const struct qcom_cpufreq_soc_data *soc_data;
+ struct cpufreq_policy *policy;
+ int lmh_dcvs_irq;
throttle_irq ?
sounds good!
};
static unsigned long cpu_hw_rate, xo_rate;
@@ -251,10 +258,84 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
}
}
+static inline unsigned long qcom_lmh_vote_to_freq(u32 val)
+{
+ return (val & 0x3FF) * 19200;
+}
+
+static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
+{
+ struct cpufreq_policy *policy = data->policy;
+ struct dev_pm_opp *opp;
+ struct device *dev;
+ unsigned long max_capacity, capacity, freq_hz, throttled_freq;
+ unsigned int val, freq;
+
+ /*
+ * Get the h/w throttled frequency, normalize it using the
+ * registered opp table and use it to calculate thermal pressure.
+ */
+ val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
I would find it cleaner to move the readl() into the helper function, as
you don't care about the register value, only the resulting frequency.
Ok..
+ freq = qcom_lmh_vote_to_freq(val);
+ freq_hz = freq * HZ_PER_KHZ;
+
+ dev = get_cpu_device(cpumask_first(policy->cpus));
+ opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
+ if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+
+ throttled_freq = freq_hz / HZ_PER_KHZ;
+
+ /* Update thermal pressure */
+
+ max_capacity = arch_scale_cpu_capacity(cpumask_first(policy->cpus));
+ capacity = throttled_freq * max_capacity;
+ capacity /= policy->cpuinfo.max_freq;
Perhaps, to avoid overflows if this is ever used on a 32-bit platform
use:
mult_frac(max_capacity, throttled_freq, policy->cpuinfo.max_freq)
yep. sounds good.
+
+ /* Don't pass boost capacity to scheduler */
+ if (capacity > max_capacity)
+ capacity = max_capacity;
+
+ arch_set_thermal_pressure(policy->cpus, max_capacity - capacity);
+
+ /*
+ * If h/w throttled frequency is higher than what cpufreq has requested for, stop
+ * polling and switch back to interrupt mechanism
+ */
+
+ if (throttled_freq >= qcom_cpufreq_hw_get(cpumask_first(policy->cpus)))
+ /* Clear the existing interrupts and enable it back */
+ enable_irq(data->lmh_dcvs_irq);
+ else
+ mod_delayed_work(system_highpri_wq, &data->lmh_dcvs_poll_work,
+ msecs_to_jiffies(10));
+}
+
+static void qcom_lmh_dcvs_poll(struct work_struct *work)
+{
+ struct qcom_cpufreq_data *data;
+
+ data = container_of(work, struct qcom_cpufreq_data, lmh_dcvs_poll_work.work);
+
+ qcom_lmh_dcvs_notify(data);
+}
+
+static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
+{
+ struct qcom_cpufreq_data *c_data = data;
+
+ /* Disable interrupt and enable polling */
+ disable_irq_nosync(c_data->lmh_dcvs_irq);
+ qcom_lmh_dcvs_notify(c_data);
+
+ return 0;
+}
+
static const struct qcom_cpufreq_soc_data qcom_soc_data = {
.reg_enable = 0x0,
.reg_freq_lut = 0x110,
.reg_volt_lut = 0x114,
+ .reg_current_vote = 0x704,
.reg_perf_state = 0x920,
.lut_row_size = 32,
};
@@ -274,6 +355,35 @@ static const struct of_device_id qcom_cpufreq_hw_match[] = {
};
MODULE_DEVICE_TABLE(of, qcom_cpufreq_hw_match);
+static int qcom_cpufreq_hw_lmh_init(struct cpufreq_policy *policy, int index)
+{
+ struct qcom_cpufreq_data *data = policy->driver_data;
+ struct platform_device *pdev = cpufreq_get_driver_data();
+ struct device *cpu_dev = get_cpu_device(policy->cpu);
+ char irq_name[15];
+ int ret;
+
+ /*
+ * Look for LMh interrupt. If no interrupt line is specified /
+ * if there is an error, allow cpufreq to be enabled as usual.
+ */
+ data->lmh_dcvs_irq = platform_get_irq(pdev, index);
+ if (data->lmh_dcvs_irq <= 0)
+ return data->lmh_dcvs_irq == -EPROBE_DEFER ? -EPROBE_DEFER : 0;
+
+ snprintf(irq_name, sizeof(irq_name), "dcvsh-irq-%u", policy->cpu);
+ ret = devm_request_irq(cpu_dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
+ 0, irq_name, data);
+ if (ret) {
+ dev_err(&pdev->dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
The irq number here won't have any meaning, and %x wouldn't be suitable.
How about ..."Error registering %s: %d\n", irq_name, ret); ?
ok.
+ return 0;
This sounds like a problem, wouldn't it be suitable to treat it as a
problem?
I thought a lot about this. My point is even if LMh does not get enabled
due to some reason, cpufreq should be enabled. If I return an error back
from here, cpufreq will be disabled.
+ }
+ data->policy = policy;
Afaict, no one is going to access data->policy unless devm_request_irq()
succeeds and if it does and the interrupt fires immediately it would be
too late to set it here. So better move it earlier.
+ INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
What if the interrupt fires before you initialize the work? Better move
this higher up.
I will move this and the data->policy = policy above before requesting
the interrupt.
+
+ return 0;
+}
+
static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
{
struct platform_device *pdev = cpufreq_get_driver_data();
@@ -370,6 +480,10 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
}
+ ret = qcom_cpufreq_hw_lmh_init(policy, index);
+ if (ret)
+ goto error;
+
return 0;
error:
kfree(data);
@@ -389,6 +503,10 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
dev_pm_opp_remove_all_dynamic(cpu_dev);
dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+ if (data->lmh_dcvs_irq > 0) {
+ devm_free_irq(cpu_dev, data->lmh_dcvs_irq, data);
As init/exit are called multiple times you should avoid the devm
variants.
Yes. I think Viresh was also mentioning this. I will move to non devm
version.
Regards,
Bjorn
+ cancel_delayed_work_sync(&data->lmh_dcvs_poll_work);
+ }
kfree(policy->freq_table);
kfree(data);
iounmap(base);
--
2.25.1
--
Warm Regards
Thara (She/Her/Hers)