On Thu, Dec 7, 2017 at 6:59 AM, Gautham R. Shenoy <ego@xxxxxxxxxxxxxxxxxx> wrote: > From: "Gautham R. Shenoy" <ego@xxxxxxxxxxxxxxxxxx> > > On POWERNV platform, Pstates are 8-bit values. On POWER8 they are > negatively numbered while on POWER9 they are positively > numbered. Thus, on POWER9, the maximum number of pstates could be as > high as 256. > > The current code interprets pstates as a signed 8-bit value. This > causes a problem on POWER9 platforms which have more than 128 pstates. > On such systems, on a CPU that is in a lower pstate whose number is > greater than 128, querying the current pstate returns a "pstate X is > out of bound" error message and the current pstate is reported as the > nominal pstate. > > This patch fixes the aforementioned issue by correctly differentiating > the sign whenever a pstate value read, depending on whether the > pstates are positively numbered or negatively numbered. > > Fixes: commit 09ca4c9b5958 ("cpufreq: powernv: Replacing pstate_id with frequency table index") > Cc: <stable@xxxxxxxxxxxxxxx> #v4.8 > Signed-off-by: Gautham R. Shenoy <ego@xxxxxxxxxxxxxxxxxx> > Tested-and-reviewed-by: Shilpasri G Bhat <shilpa.bhat@xxxxxxxxxxxxxxxxxx> > Acked-by: Viresh Kumar <viresh.kumar@xxxxxxxxxx> I'm going to apply this, or please let me know if you want to route it differently. > --- > drivers/cpufreq/powernv-cpufreq.c | 43 ++++++++++++++++++++++++++++++--------- > 1 file changed, 33 insertions(+), 10 deletions(-) > > diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c > index b6d7c4c..bb7586e 100644 > --- a/drivers/cpufreq/powernv-cpufreq.c > +++ b/drivers/cpufreq/powernv-cpufreq.c > @@ -41,11 +41,14 @@ > #define POWERNV_MAX_PSTATES 256 > #define PMSR_PSAFE_ENABLE (1UL << 30) > #define PMSR_SPR_EM_DISABLE (1UL << 31) > -#define PMSR_MAX(x) ((x >> 32) & 0xFF) > +#define EXTRACT_BYTE(x, shift) (((x) >> shift) & 0xFF) > +#define MAX_SHIFT 32 > #define LPSTATE_SHIFT 48 > #define GPSTATE_SHIFT 56 > -#define GET_LPSTATE(x) (((x) >> LPSTATE_SHIFT) & 0xFF) > -#define GET_GPSTATE(x) (((x) >> GPSTATE_SHIFT) & 0xFF) > +#define GET_PMSR_MAX(x) EXTRACT_BYTE(x, MAX_SHIFT) > +#define GET_LPSTATE(x) EXTRACT_BYTE(x, LPSTATE_SHIFT) > +#define GET_GPSTATE(x) EXTRACT_BYTE(x, GPSTATE_SHIFT) > + > > #define MAX_RAMP_DOWN_TIME 5120 > /* > @@ -64,6 +67,12 @@ > > /* Interval after which the timer is queued to bring down global pstate */ > #define GPSTATE_TIMER_INTERVAL 2000 > +/* > + * On POWER8 the pstates are negatively numbered. On POWER9, they are > + * positively numbered. Use this flag to track whether we have > + * positive or negative numbered pstates. > + */ > +static bool pos_pstates; > > /** > * struct global_pstate_info - Per policy data structure to maintain history of > @@ -164,7 +173,7 @@ static inline unsigned int pstate_to_idx(int pstate) > int min = powernv_freqs[powernv_pstate_info.min].driver_data; > int max = powernv_freqs[powernv_pstate_info.max].driver_data; > > - if (min > 0) { > + if (pos_pstates) { > if (unlikely((pstate < max) || (pstate > min))) { > pr_warn_once("pstate %d is out of bound\n", pstate); > return powernv_pstate_info.nominal; > @@ -301,6 +310,9 @@ static int init_powernv_pstates(void) > } > } > > + if ((int)pstate_min > 0) > + pos_pstates = true; > + > /* End of list marker entry */ > powernv_freqs[i].frequency = CPUFREQ_TABLE_END; > return 0; > @@ -438,7 +450,6 @@ struct powernv_smp_call_data { > static void powernv_read_cpu_freq(void *arg) > { > unsigned long pmspr_val; > - s8 local_pstate_id; > struct powernv_smp_call_data *freq_data = arg; > > pmspr_val = get_pmspr(SPRN_PMSR); > @@ -447,8 +458,11 @@ static void powernv_read_cpu_freq(void *arg) > * The local pstate id corresponds bits 48..55 in the PMSR. > * Note: Watch out for the sign! > */ > - local_pstate_id = (pmspr_val >> 48) & 0xFF; > - freq_data->pstate_id = local_pstate_id; > + if (pos_pstates) > + freq_data->pstate_id = (u8)GET_LPSTATE(pmspr_val); > + else > + freq_data->pstate_id = (s8)GET_LPSTATE(pmspr_val); > + > freq_data->freq = pstate_id_to_freq(freq_data->pstate_id); > > pr_debug("cpu %d pmsr %016lX pstate_id %d frequency %d kHz\n", > @@ -522,7 +536,10 @@ static void powernv_cpufreq_throttle_check(void *data) > chip = this_cpu_read(chip_info); > > /* Check for Pmax Capping */ > - pmsr_pmax = (s8)PMSR_MAX(pmsr); > + if (pos_pstates) > + pmsr_pmax = (u8)GET_PMSR_MAX(pmsr); > + else > + pmsr_pmax = (s8)GET_PMSR_MAX(pmsr); > pmsr_pmax_idx = pstate_to_idx(pmsr_pmax); > if (pmsr_pmax_idx != powernv_pstate_info.max) { > if (chip->throttled) > @@ -645,8 +662,14 @@ void gpstate_timer_handler(struct timer_list *t) > * value. Hence, read from PMCR to get correct data. > */ > val = get_pmspr(SPRN_PMCR); > - freq_data.gpstate_id = (s8)GET_GPSTATE(val); > - freq_data.pstate_id = (s8)GET_LPSTATE(val); > + if (pos_pstates) { > + freq_data.gpstate_id = (u8)GET_GPSTATE(val); > + freq_data.pstate_id = (u8)GET_LPSTATE(val); > + } else { > + freq_data.gpstate_id = (s8)GET_GPSTATE(val); > + freq_data.pstate_id = (s8)GET_LPSTATE(val); > + } > + > if (freq_data.gpstate_id == freq_data.pstate_id) { > reset_gpstates(policy); > spin_unlock(&gpstates->gpstate_lock); > -- > 1.8.3.1 >