Re: [PATCH v3 2/6] drm/xe/hwmon: Expose power attributes

"Nilawar, Badal" <badal.nilawar@xxxxxxxxx> · Fri, 4 Aug 2023 19:51:33 +0530

On 03-08-2023 04:53, Andi Shyti wrote:
Hi Badal,

On Wed, Aug 02, 2023 at 07:22:37PM +0530, Badal Nilawar wrote:
Expose power_max (pl1) and power_rated_max (tdp) attributes.

can you please write a few words more here to explain the
interface being exposed and what these powers are?

+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MCHBAR_REGS_H__
+#define _XE_MCHBAR_REGS_H_

there is an extra '_' in the ifndef
Sure I will fix this.

+

[...]

  #include <linux/hwmon.h>
  
  #include <drm/drm_managed.h>
+#include "regs/xe_mchbar_regs.h"
  #include "regs/xe_gt_regs.h"
  #include "xe_device.h"
  #include "xe_hwmon.h"
+#include "xe_mmio.h"
+#include "xe_gt.h"

can we keep these in alphabetical order?
Sure

+enum hwmon_reg_name {
+	REG_PKG_RAPL_LIMIT,
+	REG_PKG_POWER_SKU,
+	REG_PKG_POWER_SKU_UNIT,
+};

Are these names or id's? With name I understand string/Can't say ids. I will remove _name prefix to avoid confusion.

+enum hwmon_reg_operation {
+	REG_READ,
+	REG_WRITE,
+	REG_RMW,
+};

I'm not checking on the prefixes here... I let someone more
experienced than me comment if there anything wrong.

+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ * - power  - microwatts
+ */

this comment looks a bit off to me, what does
" - power  - microwatts" stand for?
unit of power is microwatts as per hwmon spec.

+#define SF_POWER	1000000
  
  struct xe_hwmon_data {
  	struct device *hwmon_dev;
@@ -18,13 +39,268 @@ struct xe_hwmon_data {
  
  struct xe_hwmon {
  	struct xe_hwmon_data ddat;
-	struct mutex hwmon_lock;
+	struct mutex hwmon_lock; /* rmw operations*/

please put this change in the previous patch.
Sure

+	bool reset_in_progress;
+	wait_queue_head_t waitq;
+	int scl_shift_power;
  };
  
+#define ddat_to_xe_hwmon(ddat)	({ container_of(ddat, struct xe_hwmon, ddat); })

Any particular reason for the ({ ... }) ?

+static int process_hwmon_reg(struct xe_hwmon_data *ddat, enum hwmon_reg_name reg_name,
+			     enum hwmon_reg_operation operation, u32 *value,
+			     u32 clr, u32 set)
+{
+	struct xe_reg reg;
+	int ret = 0;
+
+	reg.raw = hwmon_get_reg(ddat, reg_name);
+
+	if (!reg.raw)
+		return -EOPNOTSUPP;
+
+	switch (operation) {
+	case REG_READ:
+		*value = xe_mmio_read32(ddat->gt, reg);
+		break;
+	case REG_WRITE:
+		xe_mmio_write32(ddat->gt, reg, *value);
+		break;
+	case REG_RMW:
+		*value = xe_mmio_rmw32(ddat->gt, reg, clr, set);
+		break;
+	default:
+		XE_MISSING_CASE(operation);
+		ret = -EOPNOTSUPP;

you could just return 0 or return -EOPNOTSUPP everywhere and save
"ret" and a return (maybe not needed).

Just a personal preference, feel free to ignro and do as you like
it.
Sure I will fix this in next rev.

+		break;
+	}
+
+	return ret;
+}

[...]

+static int hwmon_power_max_read(struct xe_hwmon_data *ddat, long *value)
+{
+	struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat);
+	u32 reg_val;
+	u64 r, min, max;
+
+	process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, &reg_val, 0, 0);
+	/* Check if PL1 limit is disabled */
+	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
+		*value = PL1_DISABLE;
+		return 0;
+	}
+
+	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+
+	process_hwmon_reg_read64(ddat, REG_PKG_POWER_SKU, &r);
+	min = REG_FIELD_GET(PKG_MIN_PWR, r);
+	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+	max = REG_FIELD_GET(PKG_MAX_PWR, r);
+	max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+
+	if (min && max)
+		*value = clamp_t(u64, *value, min, max);
+
+	return 0;

you are returning '0' in any case, can we make this void?
Top layer function expects return so added return here.

+}
+
+static inline bool check_reset_in_progress(struct xe_hwmon *hwmon)
+{
+	mutex_lock(&hwmon->hwmon_lock);
+	if (!hwmon->reset_in_progress)
+		return true;
+	mutex_unlock(&hwmon->hwmon_lock);
+		return false;

This is a bit scary (apart from the indentation) and without a
strong explanation I can't let this go.

I'm pretty sure that we don't need this... can you explain?
In case of guc load not in progress (!reset_in_progress) mutex shouldn't 
be unlock, which will get unlocked once rmw operations are over.

Other way could be get mutex_lock after !reset_in_progress but that will 
add race.
	wait_event(hwmon->waitq, reset_in_progress);
	At this place there is posibility that reset_in_progress get 	set. So 
this becomes racy.
	mutex_lock(&hwmon->hwmon_lock);

Any better idea to implement this?

+}
+
+static int hwmon_power_max_write(struct xe_hwmon_data *ddat, long value)
+{
+	struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat);
+	DEFINE_WAIT(wait);
+	int ret = 0;
+	u32 nval;
+
+	/* hwmon->hwmon_lock remain held till rmw operation is over */
+	wait_event(hwmon->waitq, check_reset_in_progress(hwmon));
+
+	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
+	if (value == PL1_DISABLE) {
+		process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval,
+				  PKG_PWR_LIM_1_EN, 0);
+		process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_READ, &nval,
+				  PKG_PWR_LIM_1_EN, 0);
+
+		if (nval & PKG_PWR_LIM_1_EN)
+			ret = -ENODEV;
+		goto unlock;
+	}
+
+	/* Computation in 64-bits to avoid overflow. Round to nearest. */
+	nval = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
+	nval = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, nval);
+
+	process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &nval,
+			  PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, nval);
+unlock:
+	mutex_unlock(&hwmon->hwmon_lock);

Where is this lock taken? Are you relying on the fact that this
lock might not be taken? In any case it is not allowed to unlock
a without previously locking.
Lock is taken in check_reset_in_progress();

It's very error prone when you lock in a function and unlock in
another function and in the rare cases when this is done it has
to be written in the function name.
Sure I will add comment here.

+	return 0;
+}
+
+static int hwmon_power_rated_max_read(struct xe_hwmon_data *ddat, long *value)
+{
+	struct xe_hwmon *hwmon = ddat_to_xe_hwmon(ddat);
+	u32 reg_val;
+
+	process_hwmon_reg(ddat, REG_PKG_POWER_SKU, REG_READ, &reg_val, 0, 0);
+	reg_val = REG_FIELD_GET(PKG_PKG_TDP, reg_val);
+	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+
+	return 0;

Can this function be void?
Top level function expect return.

+}

[...]

+void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old)
+{
+	struct xe_hwmon *hwmon = xe->hwmon;
+	struct xe_hwmon_data *ddat = &hwmon->ddat;
+	u32 r;
+
+	if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT)))
+		return;
+
+	xe_device_assert_mem_access(gt_to_xe(ddat->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	hwmon->reset_in_progress = true;
+
+	process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r,
+			  PKG_PWR_LIM_1_EN, 0);
+	*old = !!(r & PKG_PWR_LIM_1_EN);

do we need to place under lock these last to lines?
Yes, want to guard this rmw operation.

+	mutex_unlock(&hwmon->hwmon_lock);
+}
+
+void xe_hwmon_power_max_restore(struct xe_device *xe, bool old)
+{
+	struct xe_hwmon *hwmon = xe->hwmon;
+	struct xe_hwmon_data *ddat = &hwmon->ddat;
+	u32 r;
+
+	if (!(hwmon && hwmon_get_reg(ddat, REG_PKG_RAPL_LIMIT)))
+		return;
+
+	xe_device_assert_mem_access(gt_to_xe(ddat->gt));
+
+	mutex_lock(&hwmon->hwmon_lock);
+
+	process_hwmon_reg(ddat, REG_PKG_RAPL_LIMIT, REG_RMW, &r,
+			  PKG_PWR_LIM_1_EN, old ? PKG_PWR_LIM_1_EN : 0);
+
+	hwmon->reset_in_progress = false;
+	wake_up_all(&hwmon->waitq);

does the wake up need to be under lock?
wake up can be added after unlock.

Now... does it eve happen that "check_reset_in_progress()"
returns false and therefore unlocks the mutex?
Didn't get this? check_reset_in_progress() will keep waiting for mutex 
till it is released by this function.

+
+	mutex_unlock(&hwmon->hwmon_lock);
+}

[...]

  void xe_hwmon_register(struct xe_device *xe)
@@ -128,13 +425,16 @@ void xe_hwmon_register(struct xe_device *xe)
  
  	hwmon_get_preregistration_info(xe);
  
+	init_waitqueue_head(&hwmon->waitq);
+
  	drm_dbg(&xe->drm, "Register xe hwmon interface\n");
  
-	/*  hwmon_dev points to device hwmon<i> */
+	/* hwmon_dev points to device hwmon<i> */

Please this change needs to go in the previous patch.
What is <i>?

  	hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
  							 ddat,
  							 &hwmon_chip_info,
  							 NULL);
+

This change in the previous patch.

  	if (IS_ERR(hwmon_dev)) {
  		drm_warn(&xe->drm, "Fail to register xe hwmon, Err:%ld\n", PTR_ERR(hwmon_dev));
  		xe->hwmon = NULL;

diff --git a/drivers/gpu/drm/xe/xe_hwmon.h b/drivers/gpu/drm/xe/xe_hwmon.h
index a078eeb0a68b..a5dc693569c5 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.h
+++ b/drivers/gpu/drm/xe/xe_hwmon.h
@@ -14,9 +14,13 @@ struct xe_device;
  #if IS_REACHABLE(CONFIG_HWMON)
  void xe_hwmon_register(struct xe_device *xe);
  void xe_hwmon_unregister(struct xe_device *xe);
+void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old);
+void xe_hwmon_power_max_restore(struct xe_device *xe, bool old);
  #else
  static inline void xe_hwmon_register(struct xe_device *xe) { };
  static inline void xe_hwmon_unregister(struct xe_device *xe) { };
+static inline void xe_hwmon_power_max_disable(struct xe_device *xe, bool *old) { };
+static inline void xe_hwmon_power_max_restore(struct xe_device *xe, bool old) { };
  #endif
  
  #endif /* __XE_HWMON_H__ */
diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h
index daf56c846d03..030296f8f863 100644
--- a/drivers/gpu/drm/xe/xe_macros.h
+++ b/drivers/gpu/drm/xe/xe_macros.h
@@ -15,4 +15,7 @@
  			    "Ioctl argument check failed at %s:%d: %s", \
  			    __FILE__, __LINE__, #cond), 1))
  
+#define XE_MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
+			     __stringify(x), (long)(x))
+

Should this have its own patch?
Sure, I will create separate patch for this.

Andi

  #endif