On Wed, 28 Aug 2024, Tero Kristo wrote: > Add efficiency latency control support to the TPMI uncore driver. This > defines two new threshold values for controlling uncore frequency, low > threshold and high threshold. When CPU utilization is below low threshold, > the user configurable floor latency control frequency can be used by the > system. When CPU utilization is above high threshold, the uncore frequency > is increased in 100MHz steps until power limit is reached. > > Signed-off-by: Tero Kristo <tero.kristo@xxxxxxxxxxxxxxx> > --- > v2: > * Converted a long sequence of if (...)'s to a switch > > .../uncore-frequency-common.h | 4 + > .../uncore-frequency/uncore-frequency-tpmi.c | 158 +++++++++++++++++- > 2 files changed, 160 insertions(+), 2 deletions(-) > > diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h > index 4c245b945e4e..b5c7311bfa05 100644 > --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h > +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.h > @@ -70,6 +70,10 @@ enum uncore_index { > UNCORE_INDEX_MIN_FREQ, > UNCORE_INDEX_MAX_FREQ, > UNCORE_INDEX_CURRENT_FREQ, > + UNCORE_INDEX_EFF_LAT_CTRL_LOW_THRESHOLD, > + UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD, > + UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE, > + UNCORE_INDEX_EFF_LAT_CTRL_FREQ, > }; > > int uncore_freq_common_init(int (*read)(struct uncore_data *data, unsigned int *value, > diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c > index 9fa3037c03d1..50b28b4b1fc0 100644 > --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c > +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c > @@ -30,6 +30,7 @@ > > #define UNCORE_MAJOR_VERSION 0 > #define UNCORE_MINOR_VERSION 2 > +#define UNCORE_ELC_SUPPORTED_VERSION 2 > #define UNCORE_HEADER_INDEX 0 > #define UNCORE_FABRIC_CLUSTER_OFFSET 8 > > @@ -46,6 +47,7 @@ struct tpmi_uncore_struct; > /* Information for each cluster */ > struct tpmi_uncore_cluster_info { > bool root_domain; > + bool elc_supported; > u8 __iomem *cluster_base; > struct uncore_data uncore_data; > struct tpmi_uncore_struct *uncore_root; > @@ -75,6 +77,10 @@ struct tpmi_uncore_struct { > /* Bit definitions for CONTROL register */ > #define UNCORE_MAX_RATIO_MASK GENMASK_ULL(14, 8) > #define UNCORE_MIN_RATIO_MASK GENMASK_ULL(21, 15) > +#define UNCORE_EFF_LAT_CTRL_RATIO_MASK GENMASK_ULL(28, 22) > +#define UNCORE_EFF_LAT_CTRL_LOW_THRESHOLD_MASK GENMASK_ULL(38, 32) > +#define UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE BIT(39) > +#define UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_MASK GENMASK_ULL(46, 40) > > /* Helper function to read MMIO offset for max/min control frequency */ > static void read_control_freq(struct tpmi_uncore_cluster_info *cluster_info, > @@ -89,6 +95,48 @@ static void read_control_freq(struct tpmi_uncore_cluster_info *cluster_info, > *value = FIELD_GET(UNCORE_MIN_RATIO_MASK, control) * UNCORE_FREQ_KHZ_MULTIPLIER; > } > > +/* Helper function to read efficiency latency control values over MMIO */ > +static int read_eff_lat_ctrl(struct uncore_data *data, unsigned int *val, enum uncore_index index) > +{ > + struct tpmi_uncore_cluster_info *cluster_info; > + u64 ctrl; > + > + cluster_info = container_of(data, struct tpmi_uncore_cluster_info, uncore_data); > + if (cluster_info->root_domain) > + return -ENODATA; > + > + if (!cluster_info->elc_supported) > + return -EOPNOTSUPP; > + > + ctrl = readq(cluster_info->cluster_base + UNCORE_CONTROL_INDEX); > + > + switch (index) { > + case UNCORE_INDEX_EFF_LAT_CTRL_LOW_THRESHOLD: > + *val = FIELD_GET(UNCORE_EFF_LAT_CTRL_LOW_THRESHOLD_MASK, ctrl); > + *val *= 100; > + *val = DIV_ROUND_UP(*val, FIELD_MAX(UNCORE_EFF_LAT_CTRL_LOW_THRESHOLD_MASK)); > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD: > + *val = FIELD_GET(UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_MASK, ctrl); > + *val *= 100; > + *val = DIV_ROUND_UP(*val, FIELD_MAX(UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_MASK)); I wonder if DIV_ROUND_CLOSEST() would be more appropriate in these two cases, rounding up isn't well justified as I think this wants to round it back to the original number to deal with the minor divergences due to precision loss during conversions? Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@xxxxxxxxxxxxxxx> -- i. > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE: > + *val = FIELD_GET(UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE, ctrl); > + break; > + case UNCORE_INDEX_EFF_LAT_CTRL_FREQ: > + *val = FIELD_GET(UNCORE_EFF_LAT_CTRL_RATIO_MASK, ctrl) * UNCORE_FREQ_KHZ_MULTIPLIER; > + break; > + > + default: > + return -EOPNOTSUPP; > + } > + > + return 0; > +} > + > #define UNCORE_MAX_RATIO FIELD_MAX(UNCORE_MAX_RATIO_MASK) > > /* Helper for sysfs read for max/min frequencies. Called under mutex locks */ > @@ -137,6 +185,82 @@ static int uncore_read_control_freq(struct uncore_data *data, unsigned int *valu > return 0; > } > > +/* Helper function for writing efficiency latency control values over MMIO */ > +static int write_eff_lat_ctrl(struct uncore_data *data, unsigned int val, enum uncore_index index) > +{ > + struct tpmi_uncore_cluster_info *cluster_info; > + u64 control; > + > + cluster_info = container_of(data, struct tpmi_uncore_cluster_info, uncore_data); > + > + if (cluster_info->root_domain) > + return -ENODATA; > + > + if (!cluster_info->elc_supported) > + return -EOPNOTSUPP; > + > + switch (index) { > + case UNCORE_INDEX_EFF_LAT_CTRL_LOW_THRESHOLD: > + if (val > 100) > + return -EINVAL; > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD: > + if (val > 100) > + return -EINVAL; > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE: > + if (val > 1) > + return -EINVAL; > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_FREQ: > + val /= UNCORE_FREQ_KHZ_MULTIPLIER; > + if (val > FIELD_MAX(UNCORE_EFF_LAT_CTRL_RATIO_MASK)) > + return -EINVAL; > + break; > + > + default: > + return -EOPNOTSUPP; > + } > + > + control = readq(cluster_info->cluster_base + UNCORE_CONTROL_INDEX); > + > + switch (index) { > + case UNCORE_INDEX_EFF_LAT_CTRL_LOW_THRESHOLD: > + val *= FIELD_MAX(UNCORE_EFF_LAT_CTRL_LOW_THRESHOLD_MASK); > + val /= 100; > + control &= ~UNCORE_EFF_LAT_CTRL_LOW_THRESHOLD_MASK; > + control |= FIELD_PREP(UNCORE_EFF_LAT_CTRL_LOW_THRESHOLD_MASK, val); > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD: > + val *= FIELD_MAX(UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_MASK); > + val /= 100; > + control &= ~UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_MASK; > + control |= FIELD_PREP(UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_MASK, val); > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE: > + control &= ~UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE; > + control |= FIELD_PREP(UNCORE_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE, val); > + break; > + > + case UNCORE_INDEX_EFF_LAT_CTRL_FREQ: > + control &= ~UNCORE_EFF_LAT_CTRL_RATIO_MASK; > + control |= FIELD_PREP(UNCORE_EFF_LAT_CTRL_RATIO_MASK, val); > + break; > + > + default: > + break; > + } > + > + writeq(control, cluster_info->cluster_base + UNCORE_CONTROL_INDEX); > + > + return 0; > +} > + > /* Helper function to write MMIO offset for max/min control frequency */ > static void write_control_freq(struct tpmi_uncore_cluster_info *cluster_info, unsigned int input, > unsigned int index) > @@ -156,7 +280,7 @@ static void write_control_freq(struct tpmi_uncore_cluster_info *cluster_info, un > writeq(control, (cluster_info->cluster_base + UNCORE_CONTROL_INDEX)); > } > > -/* Callback for sysfs write for max/min frequencies. Called under mutex locks */ > +/* Helper for sysfs write for max/min frequencies. Called under mutex locks */ > static int uncore_write_control_freq(struct uncore_data *data, unsigned int input, > enum uncore_index index) > { > @@ -234,6 +358,33 @@ static int uncore_read(struct uncore_data *data, unsigned int *value, enum uncor > case UNCORE_INDEX_CURRENT_FREQ: > return uncore_read_freq(data, value); > > + case UNCORE_INDEX_EFF_LAT_CTRL_LOW_THRESHOLD: > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD: > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE: > + case UNCORE_INDEX_EFF_LAT_CTRL_FREQ: > + return read_eff_lat_ctrl(data, value, index); > + > + default: > + break; > + } > + > + return -EOPNOTSUPP; > +} > + > +/* Callback for sysfs write for TPMI uncore data. Called under mutex locks. */ > +static int uncore_write(struct uncore_data *data, unsigned int value, enum uncore_index index) > +{ > + switch (index) { > + case UNCORE_INDEX_EFF_LAT_CTRL_LOW_THRESHOLD: > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD: > + case UNCORE_INDEX_EFF_LAT_CTRL_HIGH_THRESHOLD_ENABLE: > + case UNCORE_INDEX_EFF_LAT_CTRL_FREQ: > + return write_eff_lat_ctrl(data, value, index); > + > + case UNCORE_INDEX_MIN_FREQ: > + case UNCORE_INDEX_MAX_FREQ: > + return uncore_write_control_freq(data, value, index); > + > default: > break; > } > @@ -291,7 +442,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ > return -EINVAL; > > /* Register callbacks to uncore core */ > - ret = uncore_freq_common_init(uncore_read, uncore_write_control_freq); > + ret = uncore_freq_common_init(uncore_read, uncore_write); > if (ret) > return ret; > > @@ -409,6 +560,9 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ > > cluster_info->uncore_root = tpmi_uncore; > > + if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) >= UNCORE_ELC_SUPPORTED_VERSION) > + cluster_info->elc_supported = true; > + > ret = uncore_freq_add_entry(&cluster_info->uncore_data, 0); > if (ret) { > cluster_info->cluster_base = NULL; >