On 18/03/2021 12:55, Dmitry Osipenko wrote: > Add debug statistics collection support. The statistics is available > via debugfs in '/sys/kernel/debug/mc/stats', it shows percent of memory > controller utilization for each memory client. This information is > intended to help with debugging of memory performance issues, it already > was proven to be useful by helping to improve memory bandwidth management > of the display driver. > > Signed-off-by: Dmitry Osipenko <digetx@xxxxxxxxx> > --- > drivers/memory/tegra/mc.c | 12 ++ > drivers/memory/tegra/tegra20.c | 342 +++++++++++++++++++++++++++++++++ > include/soc/tegra/mc.h | 7 + > 3 files changed, 361 insertions(+) > > diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c > index a21163ccadc4..85c3364ebf26 100644 > --- a/drivers/memory/tegra/mc.c > +++ b/drivers/memory/tegra/mc.c > @@ -827,6 +827,18 @@ static int tegra_mc_probe(struct platform_device *pdev) > return err; > } > > + mc->debugfs.root = debugfs_create_dir("mc", NULL); > + if (!mc->debugfs.root) > + dev_err(&pdev->dev, "failed to create debugfs directory\n"); It's error pointer, not null, but anyway there is no need for handling debugfs error. See Greg KH's commits like "remove pointless check for debugfs_create_dir()". > + > + if (mc->soc->init) { > + err = mc->soc->init(mc); > + if (err < 0) > + dev_err(&pdev->dev, > + "failed to register initialize SoC driver: %d\n", "failed to initialize SoC driver:...." > + err); > + } > + > err = tegra_mc_reset_setup(mc); > if (err < 0) > dev_err(&pdev->dev, "failed to register reset controller: %d\n", > diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c > index 29ecf02805a0..513c07104296 100644 > --- a/drivers/memory/tegra/tegra20.c > +++ b/drivers/memory/tegra/tegra20.c > @@ -3,6 +3,8 @@ > * Copyright (C) 2012 NVIDIA CORPORATION. All rights reserved. > */ > > +#include <linux/bitfield.h> > +#include <linux/delay.h> > #include <linux/of_device.h> > #include <linux/slab.h> > #include <linux/string.h> > @@ -11,6 +13,77 @@ > > #include "mc.h" > > +#define MC_STAT_CONTROL 0x90 > +#define MC_STAT_EMC_CLOCK_LIMIT 0xa0 > +#define MC_STAT_EMC_CLOCKS 0xa4 > +#define MC_STAT_EMC_CONTROL_0 0xa8 > +#define MC_STAT_EMC_CONTROL_1 0xac > +#define MC_STAT_EMC_COUNT_0 0xb8 > +#define MC_STAT_EMC_COUNT_1 0xbc > + > +#define MC_STAT_CONTROL_CLIENT_ID GENMASK(13, 8) > +#define MC_STAT_CONTROL_EVENT GENMASK(23, 16) > +#define MC_STAT_CONTROL_PRI_EVENT GENMASK(25, 24) > +#define MC_STAT_CONTROL_FILTER_CLIENT_ENABLE GENMASK(26, 26) > +#define MC_STAT_CONTROL_FILTER_PRI GENMASK(29, 28) > + > +#define MC_STAT_CONTROL_PRI_EVENT_HP 0 > +#define MC_STAT_CONTROL_PRI_EVENT_TM 1 > +#define MC_STAT_CONTROL_PRI_EVENT_BW 2 > + > +#define MC_STAT_CONTROL_FILTER_PRI_DISABLE 0 > +#define MC_STAT_CONTROL_FILTER_PRI_NO 1 > +#define MC_STAT_CONTROL_FILTER_PRI_YES 2 > + > +#define MC_STAT_CONTROL_EVENT_QUALIFIED 0 > +#define MC_STAT_CONTROL_EVENT_ANY_READ 1 > +#define MC_STAT_CONTROL_EVENT_ANY_WRITE 2 > +#define MC_STAT_CONTROL_EVENT_RD_WR_CHANGE 3 > +#define MC_STAT_CONTROL_EVENT_SUCCESSIVE 4 > +#define MC_STAT_CONTROL_EVENT_ARB_BANK_AA 5 > +#define MC_STAT_CONTROL_EVENT_ARB_BANK_BB 6 > +#define MC_STAT_CONTROL_EVENT_PAGE_MISS 7 > +#define MC_STAT_CONTROL_EVENT_AUTO_PRECHARGE 8 > + > +#define EMC_GATHER_RST (0 << 8) > +#define EMC_GATHER_CLEAR (1 << 8) > +#define EMC_GATHER_DISABLE (2 << 8) > +#define EMC_GATHER_ENABLE (3 << 8) > + > +#define MC_STAT_SAMPLE_TIME_USEC 16000 > + > +/* we store collected statistics as a fixed point values */ > +#define MC_FX_FRAC_SCALE 100 > + > +struct tegra20_mc_stat_gather { > + unsigned int pri_filter; > + unsigned int pri_event; > + unsigned int result; > + unsigned int client; > + unsigned int event; > + bool client_enb; > +}; > + > +struct tegra20_mc_stat { > + struct tegra20_mc_stat_gather gather0; > + struct tegra20_mc_stat_gather gather1; > + unsigned int sample_time_usec; > + struct tegra_mc *mc; > +}; > + > +struct tegra20_mc_client_stat { > + unsigned int events; > + unsigned int arb_high_prio; > + unsigned int arb_timeout; > + unsigned int arb_bandwidth; > + unsigned int rd_wr_change; > + unsigned int successive; > + unsigned int page_miss; > + unsigned int auto_precharge; > + unsigned int arb_bank_aa; > + unsigned int arb_bank_bb; > +}; > + > static const struct tegra_mc_client tegra20_mc_clients[] = { > { > .id = 0x00, > @@ -356,6 +429,274 @@ static const struct tegra_mc_icc_ops tegra20_mc_icc_ops = { > .set = tegra20_mc_icc_set, > }; > > +static u32 tegra20_mc_stat_gather_control(struct tegra20_mc_stat_gather *g) "g" looks like pointer to const here > +{ > + u32 control; > + > + control = FIELD_PREP(MC_STAT_CONTROL_EVENT, g->event); > + control |= FIELD_PREP(MC_STAT_CONTROL_CLIENT_ID, g->client); > + control |= FIELD_PREP(MC_STAT_CONTROL_PRI_EVENT, g->pri_event); > + control |= FIELD_PREP(MC_STAT_CONTROL_FILTER_PRI, g->pri_filter); > + control |= FIELD_PREP(MC_STAT_CONTROL_FILTER_CLIENT_ENABLE, g->client_enb); > + > + return control; > +} > + > +static void tegra20_mc_stat_gather(struct tegra20_mc_stat *stat) > +{ > + u32 clocks, count0, count1, control_0, control_1; > + struct tegra_mc *mc = stat->mc; > + > + control_0 = tegra20_mc_stat_gather_control(&stat->gather0); > + control_1 = tegra20_mc_stat_gather_control(&stat->gather1); > + > + /* > + * Reset statistic gathers state, select bandwidth mode for the > + * statistics collection mode and set clocks counter saturation > + * limit to maximum. > + */ > + mc_writel(mc, 0x00000000, MC_STAT_CONTROL); > + mc_writel(mc, control_0, MC_STAT_EMC_CONTROL_0); > + mc_writel(mc, control_1, MC_STAT_EMC_CONTROL_1); > + mc_writel(mc, 0xffffffff, MC_STAT_EMC_CLOCK_LIMIT); > + > + mc_writel(mc, EMC_GATHER_ENABLE, MC_STAT_CONTROL); > + fsleep(stat->sample_time_usec); > + mc_writel(mc, EMC_GATHER_DISABLE, MC_STAT_CONTROL); > + > + count0 = mc_readl(mc, MC_STAT_EMC_COUNT_0); > + count1 = mc_readl(mc, MC_STAT_EMC_COUNT_1); > + clocks = mc_readl(mc, MC_STAT_EMC_CLOCKS); > + clocks = max(clocks / 100 / MC_FX_FRAC_SCALE, 1u); > + > + stat->gather0.result = DIV_ROUND_UP(count0, clocks); > + stat->gather1.result = DIV_ROUND_UP(count1, clocks); > +} > + > +static void tegra20_mc_stat_events(struct tegra_mc *mc, > + const struct tegra_mc_client *client0, > + const struct tegra_mc_client *client1, > + unsigned int pri_filter, > + unsigned int pri_event, > + unsigned int event, > + unsigned int *result0, > + unsigned int *result1) > +{ > + struct tegra20_mc_stat stat = {}; > + > + stat.gather0.client = client0 ? client0->id : 0; > + stat.gather0.pri_filter = pri_filter; > + stat.gather0.client_enb = !!client0; > + stat.gather0.pri_event = pri_event; > + stat.gather0.event = event; > + > + stat.gather1.client = client1 ? client1->id : 0; > + stat.gather1.pri_filter = pri_filter; > + stat.gather1.client_enb = !!client1; > + stat.gather1.pri_event = pri_event; > + stat.gather1.event = event; > + > + stat.sample_time_usec = MC_STAT_SAMPLE_TIME_USEC; > + stat.mc = mc; > + > + tegra20_mc_stat_gather(&stat); > + > + *result0 = stat.gather0.result; > + *result1 = stat.gather1.result; > +} > + > +static void tegra20_mc_collect_stats(struct tegra_mc *mc, > + struct tegra20_mc_client_stat *stats) > +{ > + const struct tegra_mc_client *client0, *client1; > + unsigned int i, clienta, clientb; Define the clienta and clientb inside the loop, to reduce the scope. Unless it is used between the loop iterations? > + > + /* collect memory controller utilization percent for each client */ > + for (i = 0; i < mc->soc->num_clients; i += 2) { > + client0 = &mc->soc->clients[i]; > + client1 = &mc->soc->clients[i + 1]; > + > + if (i + 1 == mc->soc->num_clients) > + client1 = NULL; > + > + tegra20_mc_stat_events(mc, client0, client1, > + MC_STAT_CONTROL_FILTER_PRI_DISABLE, > + MC_STAT_CONTROL_PRI_EVENT_HP, > + MC_STAT_CONTROL_EVENT_QUALIFIED, > + &stats[i + 0].events, > + &stats[i + 1].events); > + } > + > + /* collect more info from active clients */ > + for (i = 0; i < mc->soc->num_clients; i++) { > + clientb = mc->soc->num_clients; > + > + for (client0 = NULL; i < mc->soc->num_clients; i++) { > + if (stats[i].events) { > + client0 = &mc->soc->clients[i]; > + clienta = i++; > + break; > + } > + } Could all clients have 0 events ending with "clienta" being undefined? "client0" would be non-NULL because of loop before. > + > + for (client1 = NULL; i < mc->soc->num_clients; i++) { > + if (stats[i].events) { > + client1 = &mc->soc->clients[i]; > + clientb = i; > + break; > + } > + } > + > + if (!client0 && !client1) > + break; > + > + tegra20_mc_stat_events(mc, client0, client1, > + MC_STAT_CONTROL_FILTER_PRI_YES, > + MC_STAT_CONTROL_PRI_EVENT_HP, > + MC_STAT_CONTROL_EVENT_QUALIFIED, > + &stats[clienta].arb_high_prio, > + &stats[clientb].arb_high_prio); > + Best regards, Krzysztof