Re: [PATCH 3/3] memory: samsung: exynos5422-dmc: Add support for interrupt from performance counters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Krzysztof,

On 9/27/19 11:19 AM, Krzysztof Kozlowski wrote:
> On Wed, Sep 25, 2019 at 06:18:13PM +0200, Lukasz Luba wrote:
>> Introduce a new interrupt driven mechanism for managing speed of the
>> memory controller. The interrupts are generated due to performance
>> counters overflow. The performance counters might track memory reads,
>> writes, transfers, page misses, etc. In the basic algorithm tracking
>> read transfers and calculating memory pressure should be enough to
>> skip polling mode in devfreq.
>>
>> Signed-off-by: Lukasz Luba <l.luba@xxxxxxxxxxxxxxxxxxx>
>> ---
>>   drivers/memory/samsung/exynos5422-dmc.c | 297 ++++++++++++++++++++++--
>>   1 file changed, 272 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/memory/samsung/exynos5422-dmc.c b/drivers/memory/samsung/exynos5422-dmc.c
>> index 0fe5f2186139..86e1844b97ef 100644
>> --- a/drivers/memory/samsung/exynos5422-dmc.c
>> +++ b/drivers/memory/samsung/exynos5422-dmc.c
>> @@ -8,6 +8,7 @@
>>   #include <linux/devfreq.h>
>>   #include <linux/devfreq-event.h>
>>   #include <linux/device.h>
>> +#include <linux/interrupt.h>
>>   #include <linux/io.h>
>>   #include <linux/mfd/syscon.h>
>>   #include <linux/module.h>
>> @@ -35,6 +36,29 @@
>>   #define USE_BPLL_TIMINGS			(0)
>>   #define EXYNOS5_AREF_NORMAL			(0x2e)
>>   
>> +#define DREX_PPCCLKCON		(0x0130)
>> +#define DREX_PEREV2CONFIG	(0x013c)
>> +#define DREX_PMNC_PPC		(0xE000)
>> +#define DREX_CNTENS_PPC		(0xE010)
>> +#define DREX_CNTENC_PPC		(0xE020)
>> +#define DREX_INTENS_PPC		(0xE030)
>> +#define DREX_INTENC_PPC		(0xE040)
>> +#define DREX_FLAG_PPC		(0xE050)
>> +#define DREX_PMCNT2_PPC		(0xE130)
>> +
>> +#define CC_RESET		BIT(2)
>> +#define PPC_COUNTER_RESET	BIT(1)
>> +#define PPC_ENABLE		BIT(0)
>> +#define PEREV_CLK_EN		BIT(0)
>> +#define PERF_CNT2		BIT(2)
>> +#define PERF_CCNT		BIT(31)
> 
> Describe to which registers these bitfields are applicable.
OK, I will add comment above them.
> 
>> +
>> +#define READ_TRANSFER_CH0	(0x6d)
>> +#define READ_TRANSFER_CH1	(0x6f)
> 
> The same. Otherwise they all look like some generic constants which is
> not true.
Make sense, agree. I will add comment above them.
> 
>> +
>> +#define PERF_COUNTER_START_VALUE 0xff000000
>> +#define PERF_EVENT_UP_DOWN_THRESHOLD 900000000ULL
>> +
>>   /**
>>    * struct dmc_opp_table - Operating level desciption
>>    *
>> @@ -85,6 +109,9 @@ struct exynos5_dmc {
>>   	struct clk *mout_mx_mspll_ccore_phy;
>>   	struct devfreq_event_dev **counter;
>>   	int num_counters;
>> +	u64 last_overflow_ts[2];
>> +	unsigned long load, total;
> 
> One member per line.  This decreases readability.
OK
> 
>> +	bool in_irq_mode;
>>   };
>>   
>>   #define TIMING_FIELD(t_name, t_bit_beg, t_bit_end) \
>> @@ -653,6 +680,167 @@ static int exynos5_counters_get(struct exynos5_dmc *dmc,
>>   	return 0;
>>   }
>>   
>> +/**
>> + * exynos5_dmc_start_perf_events() - Setup and start performance event counters
>> + * @dmc:	device for which the counters are going to be checked
>> + * @beg_value:	initial value for the counter
>> + *
>> + * Function which enables needed counters, interrupts and sets initial values
>> + * then starts the counters.
>> + */
>> +static void exynos5_dmc_start_perf_events(struct exynos5_dmc *dmc,
>> +					  u32 beg_value)
>> +{
>> +	/* Enable interrupts for counter 2 */
>> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENS_PPC);
>> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENS_PPC);
> 
> Blank line.
> 
>> +	/* Enable counter 2 and CCNT  */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENS_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENS_PPC);
> 
> Blank line.
> 
>> +	/* Clear overflow flag for all counters */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
> 
> Blank line.
> 
>> +	/* Reset all counters */
>> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(CC_RESET | PPC_COUNTER_RESET, dmc->base_drexi1 + DREX_PMNC_PPC);
> 
> Blank line.
> 
>> +	/*
>> +	 * Set start value for the counters, the number of samples that
>> +	 * will be gathered is calculated as: 0xffffffff - beg_value
>> +	 */
>> +	writel(beg_value, dmc->base_drexi0 + DREX_PMCNT2_PPC);
>> +	writel(beg_value, dmc->base_drexi1 + DREX_PMCNT2_PPC);
> 
> Blank line.
> 
>> +	/* Start all counters */
>> +	writel(PPC_ENABLE, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(PPC_ENABLE, dmc->base_drexi1 + DREX_PMNC_PPC);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_perf_events_calc() - Calculate utilization
>> + * @dmc:	device for which the counters are going to be checked
>> + * @diff_ts:	time between last interrupt and current one
>> + *
>> + * Function which calculates needed utilization for the devfreq governor.
>> + * It prepares values for 'busy_time' and 'total_time' based on elapsed time
>> + * between interrupts, which approximates utilization.
>> + */
>> +static void exynos5_dmc_perf_events_calc(struct exynos5_dmc *dmc, u64 diff_ts)
>> +{
>> +	/*
>> +	 * This is a simple algorithm for managing traffic on DMC.
>> +	 * When there is almost no load the counters overflow every 4s,
>> +	 * no mater the DMC frequency.
>> +	 * The high load might be approximated using linear function.
>> +	 * Knowing that, simple calculation can provide 'busy_time' and
>> +	 * 'total_time' to the devfreq governor which picks up target
>> +	 * frequency.
>> +	 * We want a fast ramp up and slow decay in frequency change function.
>> +	 */
>> +	if (diff_ts < PERF_EVENT_UP_DOWN_THRESHOLD) {
>> +		/*
>> +		 * Set higher utilization for the simple_ondemand governor.
>> +		 * The governor should increase the frequency of the DMC.
>> +		 */
>> +		dmc->load = 70;
>> +		dmc->total = 100;
>> +	} else {
>> +		/*
>> +		 * Set low utilization for the simple_ondemand governor.
>> +		 * The governor should decrease the frequency of the DMC.
>> +		 */
>> +		dmc->load = 35;
>> +		dmc->total = 100;
>> +	}
>> +
>> +	dev_dbg(dmc->dev, "diff_ts=%llu\n", diff_ts);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_perf_events_check() - Checks the status of the counters
>> + * @dmc:	device for which the counters are going to be checked
>> + *
>> + * Function which is called from threaded IRQ to check the counters state
>> + * and to call approximation for the needed utilization.
>> + */
>> +static void exynos5_dmc_perf_events_check(struct exynos5_dmc *dmc)
>> +{
>> +	u32 val;
>> +	u64 diff_ts, ts;
>> +
>> +	ts = ktime_get_ns();
>> +
>> +	/* Stop all counters */
>> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
>> +
>> +	/* Check the source in interrupt flag registers (which channel) */
>> +	val = readl(dmc->base_drexi0 + DREX_FLAG_PPC);
>> +	if (val) {
>> +		diff_ts = ts - dmc->last_overflow_ts[0];
>> +		dmc->last_overflow_ts[0] = ts;
>> +		dev_dbg(dmc->dev, "drex0 0xE050 val= 0x%08x\n",  val);
>> +	} else {
>> +		val = readl(dmc->base_drexi1 + DREX_FLAG_PPC);
>> +		diff_ts = ts - dmc->last_overflow_ts[1];
>> +		dmc->last_overflow_ts[1] = ts;
>> +		dev_dbg(dmc->dev, "drex1 0xE050 val= 0x%08x\n",  val);
>> +	}
>> +
>> +	exynos5_dmc_perf_events_calc(dmc, diff_ts);
>> +
>> +	exynos5_dmc_start_perf_events(dmc, PERF_COUNTER_START_VALUE);
>> +}
>> +
>> +/**
>> + * exynos5_dmc_enable_perf_events() - Enable performance events
>> + * @dmc:	device for which the counters are going to be checked
>> + *
>> + * Function which is setup needed environment and enables counters.
>> + */
>> +static void exynos5_dmc_enable_perf_events(struct exynos5_dmc *dmc)
>> +{
>> +	u64 ts;
>> +
>> +	/* Enable Performance Event Clock */
>> +	writel(PEREV_CLK_EN, dmc->base_drexi0 + DREX_PPCCLKCON);
>> +	writel(PEREV_CLK_EN, dmc->base_drexi1 + DREX_PPCCLKCON);
>> +
>> +	/* Select read transfers as performance event2 */
>> +	writel(READ_TRANSFER_CH0, dmc->base_drexi0 + DREX_PEREV2CONFIG);
>> +	writel(READ_TRANSFER_CH1, dmc->base_drexi1 + DREX_PEREV2CONFIG);
>> +
>> +	dmc->in_irq_mode = 1;
> 
> Move this outside, to the probe. Logically it belongs there.
OK
> 
>> +
>> +	ts = ktime_get_ns();
>> +	dmc->last_overflow_ts[0] = ts;
>> +	dmc->last_overflow_ts[1] = ts;
>> +
>> +	/* Devfreq shouldn't be faster than initialization, play safe though. */
>> +	dmc->load = 99;
>> +	dmc->total = 100;
>> +}
>> +
>> +/**
>> + * exynos5_dmc_disable_perf_events() - Disable performance events
>> + * @dmc:	device for which the counters are going to be checked
>> + *
>> + * Function which stops, disables performance event counters and interrupts.
>> + */
>> +static void exynos5_dmc_disable_perf_events(struct exynos5_dmc *dmc)
>> +{
>> +	/* Stop all counters */
>> +	writel(0, dmc->base_drexi0 + DREX_PMNC_PPC);
>> +	writel(0, dmc->base_drexi1 + DREX_PMNC_PPC);
> 
> Blank line here and later.
OK - for all the 'blank line' hints.
> 
>> +	/* Disable interrupts for counter 2 */
>> +	writel(PERF_CNT2, dmc->base_drexi0 + DREX_INTENC_PPC);
>> +	writel(PERF_CNT2, dmc->base_drexi1 + DREX_INTENC_PPC);
>> +	/* Disable counter 2 and CCNT  */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_CNTENC_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_CNTENC_PPC);
>> +	/* Clear overflow flag for all counters */
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi0 + DREX_FLAG_PPC);
>> +	writel(PERF_CNT2 | PERF_CCNT, dmc->base_drexi1 + DREX_FLAG_PPC);
>> +}
>> +
>>   /**
>>    * exynos5_dmc_get_status() - Read current DMC performance statistics.
>>    * @dev:	device for which the statistics are requested
>> @@ -669,18 +857,24 @@ static int exynos5_dmc_get_status(struct device *dev,
>>   	unsigned long load, total;
>>   	int ret;
>>   
>> -	ret = exynos5_counters_get(dmc, &load, &total);
>> -	if (ret < 0)
>> -		return -EINVAL;
>> +	if (dmc->in_irq_mode) {
>> +		stat->current_frequency = dmc->curr_rate;
>> +		stat->busy_time = dmc->load;
>> +		stat->total_time = dmc->total;
>> +	} else {
>> +		ret = exynos5_counters_get(dmc, &load, &total);
>> +		if (ret < 0)
>> +			return -EINVAL;
>>   
>> -	/* To protect from overflow in calculation ratios, divide by 1024 */
>> -	stat->busy_time = load >> 10;
>> -	stat->total_time = total >> 10;
>> +		/* To protect from overflow, divide by 1024 */
>> +		stat->busy_time = load >> 10;
>> +		stat->total_time = total >> 10;
>>   
>> -	ret = exynos5_counters_set_event(dmc);
>> -	if (ret < 0) {
>> -		dev_err(dev, "could not set event counter\n");
>> -		return ret;
>> +		ret = exynos5_counters_set_event(dmc);
>> +		if (ret < 0) {
>> +			dev_err(dev, "could not set event counter\n");
>> +			return ret;
>> +		}
>>   	}
>>   
>>   	return 0;
>> @@ -712,7 +906,6 @@ static int exynos5_dmc_get_cur_freq(struct device *dev, unsigned long *freq)
>>    * It provides to the devfreq framework needed functions and polling period.
>>    */
>>   static struct devfreq_dev_profile exynos5_dmc_df_profile = {
>> -	.polling_ms = 500,
>>   	.target = exynos5_dmc_target,
>>   	.get_dev_status = exynos5_dmc_get_status,
>>   	.get_cur_freq = exynos5_dmc_get_cur_freq,
>> @@ -1108,6 +1301,26 @@ static inline int exynos5_dmc_set_pause_on_switching(struct exynos5_dmc *dmc)
>>   	return 0;
>>   }
>>   
>> +static irqreturn_t dmc_irq_thread(int irq, void *priv)
>> +{
>> +	int res;
>> +	struct exynos5_dmc *dmc = priv;
>> +
>> +	dev_dbg(dmc->dev, "irq thread handler\n");
> 
> Skip a debug in thread handler for memory. It can pollute your log (I
> guess depending on workload).
OK, I will remove it.
> 
>> +
>> +	mutex_lock(&dmc->df->lock);
>> +
>> +	exynos5_dmc_perf_events_check(dmc);
>> +
>> +	res = update_devfreq(dmc->df);
>> +	if (res)
>> +		dev_err(dmc->dev, "devfreq failed with %d\n", res);
> 
> dev_warn()
OK
> 
>> +
>> +	mutex_unlock(&dmc->df->lock);
>> +
>> +	return IRQ_HANDLED;
>> +}
>> +
>>   /**
>>    * exynos5_dmc_probe() - Probe function for the DMC driver
>>    * @pdev:	platform device for which the driver is going to be initialized
>> @@ -1125,6 +1338,7 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>>   	struct device_node *np = dev->of_node;
>>   	struct exynos5_dmc *dmc;
>>   	struct resource *res;
>> +	int irq;
>>   
>>   	dmc = devm_kzalloc(dev, sizeof(*dmc), GFP_KERNEL);
>>   	if (!dmc)
>> @@ -1172,24 +1386,48 @@ static int exynos5_dmc_probe(struct platform_device *pdev)
>>   		goto remove_clocks;
>>   	}
>>   
>> -	ret = exynos5_performance_counters_init(dmc);
>> -	if (ret) {
>> -		dev_warn(dev, "couldn't probe performance counters\n");
>> -		goto remove_clocks;
>> -	}
>> -
>>   	ret = exynos5_dmc_set_pause_on_switching(dmc);
>>   	if (ret) {
>>   		dev_warn(dev, "couldn't get access to PAUSE register\n");
>>   		goto err_devfreq_add;
> 
> This is wrong now, I think.
Good point, should be 'goto remove_clocks'.
> 
>>   	}
>>   
>> -	/*
>> -	 * Setup default thresholds for the devfreq governor.
>> -	 * The values are chosen based on experiments.
>> -	 */
>> -	dmc->gov_data.upthreshold = 30;
>> -	dmc->gov_data.downdifferential = 5;
>> +	/* There is two modes in which the driver works: polling or IRQ */
>> +	irq = platform_get_irq(pdev, 0);
> 
> You need to document it in bindings.
OK
> 
>> +	if (irq < 0) {
>> +		ret = exynos5_performance_counters_init(dmc);
>> +		if (ret) {
>> +			dev_warn(dev, "couldn't probe performance counters\n");
>> +			goto remove_clocks;
> 
> Weird, previous error jump goes to err_devfreq_add. This goes to error
> label which is narrower (less to cleanup).

Right.

Thank you for the review. I will address the issues in the next version.

Regards,
Lukasz



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux