On Thu, Apr 18, 2019 at 5:03 AM Evan Quan <evan.quan@xxxxxxx> wrote: > > Two new hwmon interfaces(temp2_input and temp3_input) are added. > They are supported on SOC15 dGPUs only. > > Change-Id: I935c512bd38e080fb8b6e3164c5e5294baff4e91 > Signed-off-by: Evan Quan <evan.quan@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 45 +++++++++++++++---- > .../gpu/drm/amd/include/kgd_pp_interface.h | 2 + > .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 +++++ > .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 19 ++++++++ > .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 18 ++++++++ > 5 files changed, 88 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > index be33144e2dca..1007307845d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c > @@ -1434,6 +1434,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, > { > struct amdgpu_device *adev = dev_get_drvdata(dev); > struct drm_device *ddev = adev->ddev; > + int channel = to_sensor_dev_attr(attr)->index; > int r, temp, size = sizeof(temp); > > /* Can't get temperature when the card is off */ > @@ -1441,11 +1442,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, > (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) > return -EINVAL; > > - /* get the temperature */ > - r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, > - (void *)&temp, &size); > - if (r) > - return r; > + if (channel >= PP_TEMP_MAX) > + return -EINVAL; > + > + switch (channel) { > + case PP_TEMP_JUNCTION: > + /* get current junction temperature */ > + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, > + (void *)&temp, &size); > + if (r) > + return r; > + break; > + case PP_TEMP_EDGE: > + /* get current edge temperature */ > + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP, > + (void *)&temp, &size); > + if (r) > + return r; > + break; > + case PP_TEMP_MEM: > + /* get current memory temperature */ > + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP, > + (void *)&temp, &size); > + if (r) > + return r; > + break; > + } > > return snprintf(buf, PAGE_SIZE, "%d\n", temp); > } > @@ -2109,7 +2131,8 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, > * - temp[1-3]_label: temperature channel label > * - temp2_label and temp3_label are supported on SOC15 dGPUs only > * > - * - temp1_input: the on die GPU temperature in millidegrees Celsius > + * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius > + * - temp2_input and temp3_input are supported on SOC15 dGPUs only > * > * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius > * - temp2_crit and temp3_crit are supported on SOC15 dGPUs only > @@ -2166,13 +2189,15 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev, > * > */ > > -static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); > +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION); > static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); > static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); > static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); > +static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE); > static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 0); > static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_edge_temp_thresh, NULL, 1); > static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); > +static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM); > static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); > static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); > static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); > @@ -2205,8 +2230,10 @@ static struct attribute *hwmon_attributes[] = { > &sensor_dev_attr_temp1_input.dev_attr.attr, > &sensor_dev_attr_temp1_crit.dev_attr.attr, > &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, > + &sensor_dev_attr_temp2_input.dev_attr.attr, > &sensor_dev_attr_temp2_crit.dev_attr.attr, > &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, > + &sensor_dev_attr_temp3_input.dev_attr.attr, > &sensor_dev_attr_temp3_crit.dev_attr.attr, > &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, > &sensor_dev_attr_temp1_label.dev_attr.attr, > @@ -2348,7 +2375,9 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, > attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || > attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || > attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || > - attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr)) > + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr || > + attr == &sensor_dev_attr_temp2_input.dev_attr.attr || > + attr == &sensor_dev_attr_temp3_input.dev_attr.attr)) > return 0; > > > diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h > index 17324c0d503e..19713ffdb03e 100644 > --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h > +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h > @@ -111,6 +111,8 @@ enum amd_pp_sensors { > AMDGPU_PP_SENSOR_GPU_LOAD, > AMDGPU_PP_SENSOR_GFX_MCLK, > AMDGPU_PP_SENSOR_GPU_TEMP, Add: AMDGPU_PP_SENSOR_GPU_JUNCTION_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, and use that for clarity. That said, existing asics use AMDGPU_PP_SENSOR_GPU_TEMP for the edge temperature, so I'd suggest making AMDGPU_PP_SENSOR_GPU_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP, and then add a new entry for JUNCTION. > + AMDGPU_PP_SENSOR_EDGE_TEMP, > + AMDGPU_PP_SENSOR_MEM_TEMP, > AMDGPU_PP_SENSOR_VCE_POWER, > AMDGPU_PP_SENSOR_UVD_POWER, > AMDGPU_PP_SENSOR_GPU_POWER, > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > index 1d78a5ee9523..f4ecbbe854ee 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c > @@ -3785,6 +3785,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx, > *((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr); I think vega10_thermal_get_temperature() returns the edge temperature on vega10. Maybe it would be better to switch to PPSMC_MSG_GetTemperatureHotspot for AMDGPU_PP_SENSOR_GPU_JUNCTION_TEMP or use vega10_thermal_get_temperature() for EDGE. > *size = 4; > break; > + case AMDGPU_PP_SENSOR_EDGE_TEMP: > + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureEdge); > + *((uint32_t *)value) = smum_get_argument(hwmgr) * > + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; > + *size = 4; > + break; > + case AMDGPU_PP_SENSOR_MEM_TEMP: > + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM); > + *((uint32_t *)value) = smum_get_argument(hwmgr) * > + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; > + *size = 4; > + break; > case AMDGPU_PP_SENSOR_UVD_POWER: > *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; > *size = 4; > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c > index 695ac2875540..86c48cb56f6c 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c > @@ -1338,6 +1338,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, > void *value, int *size) > { > struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend); > + SmuMetrics_t metrics_table; > int ret = 0; > > switch (idx) { > @@ -1360,6 +1361,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx, > *((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr); > *size = 4; > break; > + case AMDGPU_PP_SENSOR_EDGE_TEMP: > + ret = vega12_get_metrics_table(hwmgr, &metrics_table); > + if (ret) > + return ret; > + > + *((uint32_t *)value) = metrics_table.TemperatureEdge * > + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; > + *size = 4; > + break; > + case AMDGPU_PP_SENSOR_MEM_TEMP: > + ret = vega12_get_metrics_table(hwmgr, &metrics_table); > + if (ret) > + return ret; > + > + *((uint32_t *)value) = metrics_table.TemperatureHBM * > + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; > + *size = 4; > + break; > case AMDGPU_PP_SENSOR_UVD_POWER: > *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; > *size = 4; > diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > index 0c0714862eb8..72a71a002f0b 100644 > --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c > @@ -2142,6 +2142,24 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx, > *((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr); > *size = 4; > break; > + case AMDGPU_PP_SENSOR_EDGE_TEMP: > + ret = vega20_get_metrics_table(hwmgr, &metrics_table); > + if (ret) > + return ret; > + > + *((uint32_t *)value) = metrics_table.TemperatureEdge * > + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; > + *size = 4; > + break; > + case AMDGPU_PP_SENSOR_MEM_TEMP: > + ret = vega20_get_metrics_table(hwmgr, &metrics_table); > + if (ret) > + return ret; > + > + *((uint32_t *)value) = metrics_table.TemperatureHBM * > + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; > + *size = 4; > + break; > case AMDGPU_PP_SENSOR_UVD_POWER: > *((uint32_t *)value) = data->uvd_power_gated ? 0 : 1; > *size = 4; > -- > 2.21.0 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx