On 11.11.2019 07:13, Chanwoo Choi wrote: > Hi Leonard, > > On 11/9/19 7:39 AM, Leonard Crestez wrote: >> The imx8m ddrc has a performance monitoring block attached which can >> be used to measure bandwidth usage and automatically adjust frequency. >> >> There is already a perf driver for that block so instead of implementing >> a devfreq events driver use the in-kernel perf API to implement >> get_dev_status directly. >> >> Signed-off-by: Leonard Crestez <leonard.crestez@xxxxxxx> >> --- >> drivers/devfreq/imx8m-ddrc.c | 153 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 153 insertions(+) >> >> diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c >> index 51903fee21a7..6372191f72d7 100644 >> --- a/drivers/devfreq/imx8m-ddrc.c >> +++ b/drivers/devfreq/imx8m-ddrc.c >> @@ -11,10 +11,13 @@ >> #include <linux/pm_opp.h> >> #include <linux/clk.h> >> #include <linux/clk-provider.h> >> #include <linux/arm-smccc.h> >> >> +#include <asm/perf_event.h> >> +#include <linux/perf_event.h> >> + >> #define IMX_SIP_DDR_DVFS 0xc2000004 >> >> /* Values starting from 0 switch to specific frequency */ >> #define IMX_SIP_DDR_FREQ_SET_HIGH 0x00 >> >> @@ -78,10 +81,22 @@ struct imx8m_ddrc { >> struct clk *dram_alt; >> struct clk *dram_apb; >> >> int freq_count; >> struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT]; >> + >> + /* For measuring load with perf events: */ >> + struct platform_device *pmu_pdev; >> + struct pmu *pmu; >> + >> + struct perf_event_attr rd_event_attr; >> + struct perf_event_attr wr_event_attr; >> + struct perf_event *rd_event; >> + struct perf_event *wr_event; >> + >> + u64 last_rd_val, last_rd_ena, last_rd_run; >> + u64 last_wr_val, last_wr_ena, last_wr_run; >> }; >> >> static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv, >> unsigned long rate) >> { >> @@ -245,10 +260,131 @@ static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq) >> *freq = clk_get_rate(priv->dram_core); >> >> return 0; >> } >> >> +static int imx8m_ddrc_get_dev_status(struct device *dev, >> + struct devfreq_dev_status *stat) >> +{ >> + struct imx8m_ddrc *priv = dev_get_drvdata(dev); >> + >> + stat->current_frequency = clk_get_rate(priv->dram_core); >> + >> + if (priv->rd_event && priv->wr_event) { >> + u64 rd_delta, rd_val, rd_ena, rd_run; >> + u64 wr_delta, wr_val, wr_ena, wr_run; >> + >> + rd_val = perf_event_read_value(priv->rd_event, >> + &rd_ena, &rd_run); >> + wr_val = perf_event_read_value(priv->wr_event, >> + &wr_ena, &wr_run); >> + >> + rd_delta = (rd_val - priv->last_rd_val) * >> + (rd_ena - priv->last_rd_ena); >> + do_div(rd_delta, rd_run - priv->last_rd_run); >> + priv->last_rd_val = rd_val; >> + priv->last_rd_ena = rd_ena; >> + priv->last_rd_run = rd_run; >> + >> + wr_delta = (wr_val - priv->last_wr_val) * >> + (wr_ena - priv->last_wr_ena); >> + do_div(wr_delta, wr_run - priv->last_wr_run); >> + priv->last_wr_val = wr_val; >> + priv->last_wr_ena = wr_ena; >> + priv->last_wr_run = wr_run; >> + >> + /* magic numbers, possibly wrong */ >> + stat->busy_time = 4 * (rd_delta + wr_delta); >> + stat->total_time = stat->current_frequency; >> + } else { >> + stat->busy_time = 0; >> + stat->total_time = 0; >> + } >> + >> + return 0; >> +} >> + >> +static int imx8m_ddrc_perf_disable(struct imx8m_ddrc *priv) >> +{ >> + /* release and set to NULL */ >> + if (!IS_ERR_OR_NULL(priv->rd_event)) >> + perf_event_release_kernel(priv->rd_event); >> + if (!IS_ERR_OR_NULL(priv->wr_event)) >> + perf_event_release_kernel(priv->wr_event); >> + priv->rd_event = NULL; >> + priv->wr_event = NULL; >> + >> + return 0; >> +} >> + >> +static int imx8m_ddrc_perf_enable(struct imx8m_ddrc *priv) > > Actually, this function have to call the just function for enabling > the bandwidth monitoring instead of writing the detailed something. > It looks like that you move the part of the different device driver into here. This is the code for enabling bandwith monitoring: it creates perf counters using in-kernel API. The perf api doesn't seem to expose anything else to enable/disable the counter after creation and honestly just create/destroy seems fine. As far as I can tell bandwidth monitoring in devfreq is always enabled on probe anyway, no matter which governor is in use. It would be nice if devfreq drivers could receive a callback and dynamically acquire/release monitoring resources only when the ondemand governor is in used. Until then this driver will permanently allocate 2 (out of 3) counters in ddr pmu hardware. >> +{ >> + int ret; >> + >> + priv->rd_event_attr.size = sizeof(priv->rd_event_attr); >> + priv->rd_event_attr.type = priv->pmu->type; >> + priv->rd_event_attr.config = 0x2a; >> + >> + priv->rd_event = perf_event_create_kernel_counter( >> + &priv->rd_event_attr, 0, NULL, NULL, NULL); >> + if (IS_ERR(priv->rd_event)) { >> + ret = PTR_ERR(priv->rd_event); >> + goto err; >> + } >> + >> + priv->wr_event_attr.size = sizeof(priv->wr_event_attr); >> + priv->wr_event_attr.type = priv->pmu->type; >> + priv->wr_event_attr.config = 0x2b; >> + >> + priv->wr_event = perf_event_create_kernel_counter( >> + &priv->wr_event_attr, 0, NULL, NULL, NULL); >> + if (IS_ERR(priv->wr_event)) { >> + ret = PTR_ERR(priv->wr_event); >> + goto err; >> + } >> + >> + return 0; >> + >> +err: >> + imx8m_ddrc_perf_disable(priv); >> + return ret; >> +} >> + >> +static int imx8m_ddrc_init_events(struct device *dev, >> + struct device_node *events_node) > > ditto. > >> +{ >> + struct imx8m_ddrc *priv = dev_get_drvdata(dev); >> + struct device_driver *driver; >> + >> + /* >> + * We need pmu->type for perf_event_attr but there is no API for >> + * mapping device_node to pmu. Fetch private data for imx-ddr-pmu and >> + * cast that to a struct pmu instead. >> + */ >> + priv->pmu_pdev = of_find_device_by_node(events_node); >> + if (!priv->pmu_pdev) >> + return -EPROBE_DEFER; >> + driver = priv->pmu_pdev->dev.driver; >> + if (!driver) >> + return -EPROBE_DEFER; >> + if (strcmp(driver->name, "imx-ddr-pmu")) { >> + dev_warn(dev, "devfreq-events node %pOF has unexpected driver %s\n", >> + events_node, driver->name); >> + return -ENODEV; >> + } >> + >> + priv->pmu = platform_get_drvdata(priv->pmu_pdev); > > It seems that you access the different device driver without > any standard interface from some linux kernel subsystem. > > For the communication or control between different device drivers, > we have to use the standard interface instead of direct access or call. > I think that it make it too tightly coupled driver between them. > > So, I developed the devfreq-event subsystem for this reason > in order to remove the non-standard direct access to other device driver. > > Unfortunately, I can't agree this approach. I don't prefer to use > the direct access of other device driver(imx-ddr-pmu). You need to > use standard interface provided from subsystem. or You need to make > the new device driver with devfreq-event subsystem. This could be cleaned-up by adding a new API to "fetch struct pmu* by struct device_node*" as available for many other subsystems. The perf api is otherwise standard/generic and has a few other in-kernel users. The perf driver for DDR PMU is already functional and useful for profiling and reusing it seem very worthwhile. If you're suggesting I implemented an unrelated "devfreq-event" driver then how would it get probed? There's only one PMU node in DT. I wouldn't mind to delay the monitoring part into a second series. >> + if (!priv->pmu) { >> + dev_err(dev, "devfreq-events device missing private data\n"); >> + return -EINVAL; >> + } >> + >> + dev_dbg(dev, "events from pmu %s\n", priv->pmu->name); >> + >> + return imx8m_ddrc_perf_enable(priv); >> +} >> + >> static int imx8m_ddrc_init_freq_info(struct device *dev) >> { >> struct imx8m_ddrc *priv = dev_get_drvdata(dev); >> struct arm_smccc_res res; >> int index; >> @@ -328,17 +464,23 @@ static int imx8m_ddrc_check_opps(struct device *dev) >> return 0; >> } >> >> static void imx8m_ddrc_exit(struct device *dev) >> { >> + struct imx8m_ddrc *priv = dev_get_drvdata(dev); >> + >> + imx8m_ddrc_perf_disable(priv); >> + platform_device_put(priv->pmu_pdev); >> + >> dev_pm_opp_of_remove_table(dev); >> } >> >> static int imx8m_ddrc_probe(struct platform_device *pdev) >> { >> struct device *dev = &pdev->dev; >> struct imx8m_ddrc *priv; >> + struct device_node *events_node; >> const char *gov = DEVFREQ_GOV_USERSPACE; >> int ret; >> >> priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); >> if (!priv) >> @@ -350,10 +492,19 @@ static int imx8m_ddrc_probe(struct platform_device *pdev) >> if (ret) { >> dev_err(dev, "failed to init firmware freq info: %d\n", ret); >> return ret; >> } >> >> + events_node = of_parse_phandle(dev->of_node, "devfreq-events", 0); >> + if (events_node) { >> + ret = imx8m_ddrc_init_events(dev, events_node); >> + of_node_put(events_node); >> + if (ret) >> + goto err; >> + gov = DEVFREQ_GOV_SIMPLE_ONDEMAND; >> + } >> + >> priv->dram_core = devm_clk_get(dev, "core"); >> priv->dram_pll = devm_clk_get(dev, "pll"); >> priv->dram_alt = devm_clk_get(dev, "alt"); >> priv->dram_apb = devm_clk_get(dev, "apb"); >> if (IS_ERR(priv->dram_core) || >> @@ -390,10 +541,12 @@ static int imx8m_ddrc_probe(struct platform_device *pdev) >> } >> >> return 0; >> >> err: >> + imx8m_ddrc_perf_disable(priv); >> + platform_device_put(priv->pmu_pdev); >> dev_pm_opp_of_remove_table(dev); >> return ret; >> } >> >> static const struct of_device_id imx8m_ddrc_of_match[] = {