Re: [Intel-gfx] [PATCH] drm/i915/guc: Disable PL1 power limit when loading GuC firmware

"Dixit, Ashutosh" <ashutosh.dixit@xxxxxxxxx> · Wed, 05 Apr 2023 21:52:21 -0700

On Tue, 28 Mar 2023 02:14:42 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 27/03/2023 18:47, Rodrigo Vivi wrote:
> >
> > +Daniel
> >
> > On Mon, Mar 27, 2023 at 09:58:52AM -0700, Dixit, Ashutosh wrote:
> >> On Sun, 26 Mar 2023 04:52:59 -0700, Rodrigo Vivi wrote:
> >>>
> >>
> >> Hi Rodrigo,
> >>
> >>> On Fri, Mar 24, 2023 at 04:31:22PM -0700, Dixit, Ashutosh wrote:
> >>>> On Fri, 24 Mar 2023 11:15:02 -0700, Belgaumkar, Vinay wrote:
> >>>>>
> >>>>
> >>>> Hi Vinay,
> >>>>
> >>>> Thanks for the review. Comments inline below.
> >>>>
> >>>>> On 3/15/2023 8:59 PM, Ashutosh Dixit wrote:
> >>>>>> On dGfx, the PL1 power limit being enabled and set to a low value results
> >>>>>> in a low GPU operating freq. It also negates the freq raise operation which
> >>>>>> is done before GuC firmware load. As a result GuC firmware load can time
> >>>>>> out. Such timeouts were seen in the GL #8062 bug below (where the PL1 power
> >>>>>> limit was enabled and set to a low value). Therefore disable the PL1 power
> >>>>>> limit when allowed by HW when loading GuC firmware.
> >>>>> v3 label missing in subject.
> >>>>>>
> >>>>>> v2:
> >>>>>>    - Take mutex (to disallow writes to power1_max) across GuC reset/fw load
> >>>>>>    - Add hwm_power_max_restore to error return code path
> >>>>>>
> >>>>>> v3 (Jani N):
> >>>>>>    - Add/remove explanatory comments
> >>>>>>    - Function renames
> >>>>>>    - Type corrections
> >>>>>>    - Locking annotation
> >>>>>>
> >>>>>> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
> >>>>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx>
> >>>>>> ---
> >>>>>>    drivers/gpu/drm/i915/gt/uc/intel_uc.c |  9 +++++++
> >>>>>>    drivers/gpu/drm/i915/i915_hwmon.c     | 39 +++++++++++++++++++++++++++
> >>>>>>    drivers/gpu/drm/i915/i915_hwmon.h     |  7 +++++
> >>>>>>    3 files changed, 55 insertions(+)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> >>>>>> index 4ccb4be4c9cba..aa8e35a5636a0 100644
> >>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> >>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> >>>>>> @@ -18,6 +18,7 @@
> >>>>>>    #include "intel_uc.h"
> >>>>>>      #include "i915_drv.h"
> >>>>>> +#include "i915_hwmon.h"
> >>>>>>      static const struct intel_uc_ops uc_ops_off;
> >>>>>>    static const struct intel_uc_ops uc_ops_on;
> >>>>>> @@ -461,6 +462,7 @@ static int __uc_init_hw(struct intel_uc *uc)
> >>>>>>	struct intel_guc *guc = &uc->guc;
> >>>>>>	struct intel_huc *huc = &uc->huc;
> >>>>>>	int ret, attempts;
> >>>>>> +	bool pl1en;
> >>>>>
> >>>>> Init to 'false' here
> >>>>
> >>>> See next comment.
> >>>>
> >>>>>
> >>>>>
> >>>>>>		GEM_BUG_ON(!intel_uc_supports_guc(uc));
> >>>>>>	GEM_BUG_ON(!intel_uc_wants_guc(uc));
> >>>>>> @@ -491,6 +493,9 @@ static int __uc_init_hw(struct intel_uc *uc)
> >>>>>>	else
> >>>>>>		attempts = 1;
> >>>>>>    +	/* Disable a potentially low PL1 power limit to allow freq to be
> >>>>>> raised */
> >>>>>> +	i915_hwmon_power_max_disable(gt->i915, &pl1en);
> >>>>>> +
> >>>>>>	intel_rps_raise_unslice(&uc_to_gt(uc)->rps);
> >>>>>>		while (attempts--) {
> >>>>>> @@ -547,6 +552,8 @@ static int __uc_init_hw(struct intel_uc *uc)
> >>>>>>		intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
> >>>>>>	}
> >>>>>>    +	i915_hwmon_power_max_restore(gt->i915, pl1en);
> >>>>>> +
> >>>>>>	guc_info(guc, "submission %s\n", str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
> >>>>>>	guc_info(guc, "SLPC %s\n", str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
> >>>>>>    @@ -563,6 +570,8 @@ static int __uc_init_hw(struct intel_uc *uc)
> >>>>>>	/* Return GT back to RPn */
> >>>>>>	intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
> >>>>>>    +	i915_hwmon_power_max_restore(gt->i915, pl1en);
> >>>>>
> >>>>> if (pl1en)
> >>>>>
> >>>>>      i915_hwmon_power_max_enable().
> >>>>
> >>>> IMO it's better not to have checks in the main __uc_init_hw() function (if
> >>>> we do this we'll need to add 2 checks in __uc_init_hw()). If you really
> >>>> want we could do something like this inside
> >>>> i915_hwmon_power_max_disable/i915_hwmon_power_max_restore. But for now I
> >>>> am not making any changes.
> >>>>
> >>>> (I can send a patch with the changes if you want to take a look but IMO it
> >>>> will add more logic/code but without real benefits (it will save a rmw if
> >>>> the limit was already disabled, but IMO this code is called so infrequently
> >>>> (only during GuC resets) as to not have any significant impact)).
> >>>>
> >>>>>
> >>>>>> +
> >>>>>>	__uc_sanitize(uc);
> >>>>>>		if (!ret) {
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> >>>>>> index ee63a8fd88fc1..769b5bda4d53f 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> >>>>>> @@ -444,6 +444,45 @@ hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
> >>>>>>	}
> >>>>>>    }
> >>>>>>    +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool
> >>>>>> *old)
> >>>>> Shouldn't we call this i915_hwmon_package_pl1_disable()?
> >>>>
> >>>> I did think of using "pl1" in the function name but then decided to retain
> >>>> "power_max" because other hwmon functions for PL1 limit also use
> >>>> "power_max" (hwm_power_max_read/hwm_power_max_write) and currently
> >>>> "hwmon_power_max" is mapped to the PL1 limit. So "power_max" is used to
> >>>> show that all these functions deal with the PL1 power limit.
> >>>>
> >>>> There is a comment in __uc_init_hw() explaining "power_max" means the PL1
> >>>> power limit.
> >>>>
> >>>>>> +	__acquires(i915->hwmon->hwmon_lock)
> >>>>>> +{
> >>>>>> +	struct i915_hwmon *hwmon = i915->hwmon;
> >>>>>> +	intel_wakeref_t wakeref;
> >>>>>> +	u32 r;
> >>>>>> +
> >>>>>> +	if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> >>>>>> +		return;
> >>>>>> +
> >>>>>> +	/* Take mutex to prevent concurrent hwm_power_max_write */
> >>>>>> +	mutex_lock(&hwmon->hwmon_lock);
> >>>>>> +
> >>>>>> +	with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
> >>>>>> +		r = intel_uncore_rmw(hwmon->ddat.uncore,
> >>>>>> +				     hwmon->rg.pkg_rapl_limit,
> >>>>>> +				     PKG_PWR_LIM_1_EN, 0);
> >>>>> Most of this code (lock and rmw parts) is already inside static void
> >>>>> hwm_locked_with_pm_intel_uncore_rmw() , can we reuse that here?
> >>>>
> >>>> This was the case in v1 of the patch:
> >>>>
> >>>> https://patchwork.freedesktop.org/patch/526393/?series=115003&rev=1
> >>>>
> >>>> But now this cannot be done because if you notice we acquire the mutex in
> >>>> i915_hwmon_power_max_disable() and release the mutex in
> >>>> i915_hwmon_power_max_restore().
> >>>>
> >>>> I explained the reason why this the mutex is handled this way in my reply
> >>>> to Jani Nikula here:
> >>>>
> >>>> https://patchwork.freedesktop.org/patch/526598/?series=115003&rev=2
> >>>>
> >>>> Quoting below:
> >>>>
> >>>> ```
> >>>>>> +	/* hwmon_lock mutex is unlocked in hwm_power_max_restore */
> >>>>>
> >>>>> Not too happy about that... any better ideas?
> >>>>
> >>>> Afais, taking the mutex is the only fully correct solution (when we disable
> >>>> the power limit, userspace can go re-enable it). Examples of partly
> >>>> incorrect solutions (which don't take the mutex) include:
> >>>>
> >>>> a. Don't take the mutex, don't do anything, ignore any changes to the value
> >>>>     if it has changed during GuC reset/fw load (just overwrite the changed
> >>>>     value). Con: changed value is lost.
> >>>>
> >>>> b. Detect if the value has changed (the limit has been re-enabled) after we
> >>>>     have disabled the limit and in that case skip restoring the value. But
> >>>>     then someone can say why do we allow enabling the PL1 limit since we
> >>>>     want to disable it.
> >>>>
> >>>> Both these are very unlikely scenarios so they might work. But I would
> >>>> first like to explore if holding a mutex across GuC reset is prolebmatic
> >>>> since that is /the/ correct solution. But if anyone comes up with a reason
> >>>> why that cannot be done we can look at these other not completely correct
> >>>> options.
> >>>
> >>> I see what you are doing and it looks indeed a very safe approach to ensure
> >>> the pl1 won't be toggled by other paths while we need some guaranteed state
> >>> here, or hw init fails badly.
> >>>
> >>> But in the end you are making your lock to protect the code from another path
> >>> and not protecting the data itself. The data was already protected in the
> >>> first version with the lock in the rmw.
> >>
> >> Sorry I am not really following. Daniel had mentioned this "protecting code
> >> vs protecting data" but I am wondering how it is applicable in this
> >> case. IMO here the data we are protecting is the register which we don't
> >> want written to by userland while GuC load is in progress. To do that we
> >> need to block the code path writing to register. So what we have here seems
> >> to me to be the simplest and cleanest approach for solving this issue.
> >
> > I believe your cases here is exactly what Daniel had mentioned as protecting
> > code and not data. Well, in the end we are of course protecting data to be
> > modified, but in your case you use that mutex to also protect the code path
> > and avoid other calls while you are in this guc_init_path...
> >
> > Please Daniel, correct me here if I got it wrong.
> >
> > What I don't like here is that we lock from one function and keep that for a
> > while and unlock from the other function. To protect the data itself in general
> > we just need for a very minimal time while we are modifying the data itself.
> >
> >>
> >>> maybe we need to have some kind of a state check with other state-lock and
> >>> then if we are in this forced state for init path, the request for the normal path
> >>> ignores and move one,
> >>
> >> I don't see how this will *not* be racy...
> >
> > maybe something like this?:
> >
> > at power_max_disable:
> > mutex_lock(data_lock);
> >
> > mutex_lock(state_lock);
> > state = in_use;
> > mutex_unlock(state_lock);
> >
> > mmio_rmw();
> > mutex_unlock(data_lock);
> >
> >
> > at power_max_restoration:
> >
> > at power_max_disable:
> > mutex_lock(data_lock);
> >
> > mutex_lock(state_lock);
> > state = available;
> > mutex_unlock(state_lock);
> >
> > mmio_rmw();
> > mutex_unlock(data_lock);
> >
> > at sysfs fn:
> >
> > mutex_lock(data_lock);
> > mutex_lock(state_lock);
> > if (state == in_use) {
> >     ret = -EAGAIN
> >     goto out;
> > }
> > mutex_unlock(state_lock);
> >
> > ....
> >
> > out:
> >
> > mutex_unlock(data_lock);
>
> I agree holding the mutex across functions to cover the GuC init path is
> not the nicest pattern. Above looks a plausible improvement,

Agree, the above turned out rather nice.

> although I don't know if EAGAIN is correct for hwmon, or if blocking is.

Yes blocking is the correct uapi behavior, Patch 3/3 in the latest version
does this:

https://patchwork.freedesktop.org/series/116172/

> Is something expected to be configuring those fields during boot and can
> it even handle EAGAIN?
>
> One advantage of the solution from this patch I can see though is that I
> think it eliminates data races (restoring the stale value) with fw reload
> triggered by a potential full GPU reset happening in parallel to sysfs
> writes.
>
> Another thing to check would be if the inversions between
> hwmon_lock->rpm_get and rpm_get->hwmon_lock are okay.
>
> In fact, I am not sure rpm_get in this patch is needed? Seems to be running
> under paths which guarantee holding it already, if I am not missing
> something. If not needed then there is obviously no inversion in any way.

Yes, I checked and believe you are right. So I have eliminated rpm_get in
the disable/restore functions in the latest version.

Thanks.
--
Ashutosh

> P.S.
> Do some of the exiting mutex_lock need actually be mutex_lock_interruptible
> so sysfs reads/write can Ctrl-C, in theory at least.
>
>
> >>> or maybe we queue some request...
> >>
> >> Queuing a request will not be enough (even if this is possible), the
> >> request will need to wait to complete till GuC load completes. So we'll
> >> have to complete the request when GuC load completes, similar to releasing
> >> the mutex in the current patch. Looks like a much more complicated way of
> >> doing what the mutex does very simply.
> >
> > The wq would sleep/delay while state == in_use, then process the next request...
> >
> >>
> >> So:
> >>
> >> a. What is the real problem with the current implementation?
> >
> > probably the big lock used to protect the state machinery...
> >
> > but if other folks believe that we don't have an actual problem here
> > and this big lock is acceptable as long as it has the annotation for
> > the static analyzers, I'm okay to just let it go...
> >
> >
> >>
> >> b. What would be the correct solution for it? That is how, specifically,
> >>     should we implement it?
> >
> > state handling with separated lock from the data itself is my suggestion.
> >
> >>
> >> Some more guidance will be helpful if you think this patch has issues.
> >
> > I hope Daniel and/or other i915 maintainers can jump here. Specially if
> > I'm being to paranoid and the current patch is enough...
> >
> >>
> >> Thanks.
> >> --
> >> Ashutosh
> >>
> >>>> ```
> >>>>
> >>>>>> +
> >>>>>> +	*old = !!(r & PKG_PWR_LIM_1_EN);
> >>>>>> +}
> >>>>>> +
> >>>>>> +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old)
> >>>>>> +	__releases(i915->hwmon->hwmon_lock)
> >>>>> We can just call this i915_hwmon_power_max_enable() and call whenever the
> >>>>> old value was actually enabled. That way, we have proper mirror functions.
> >>>>
> >>>> As I explained that would mean adding two checks in the main __uc_init_hw()
> >>>> function which I am trying to avoid. So we have disable/restore pair.
> >>>>
> >>>>>> +{
> >>>>>> +	struct i915_hwmon *hwmon = i915->hwmon;
> >>>>>> +	intel_wakeref_t wakeref;
> >>>>>> +
> >>>>>> +	if (!hwmon || !i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
> >>>>>> +		return;
> >>>>>> +
> >>>>>> +	with_intel_runtime_pm(hwmon->ddat.uncore->rpm, wakeref)
> >>>>>> +		intel_uncore_rmw(hwmon->ddat.uncore,
> >>>>>> +				 hwmon->rg.pkg_rapl_limit,
> >>>>>> +				 PKG_PWR_LIM_1_EN,
> >>>>>> +				 old ? PKG_PWR_LIM_1_EN : 0);
> >>>>>
> >>>>> 3rd param should be 0 here, else we will end up clearing other bits.
> >>>>
> >>>> No see intel_uncore_rmw(), it will only clear the PKG_PWR_LIM_1_EN bit, so
> >>>> the code here is correct. intel_uncore_rmw() does:
> >>>>
> >>>>          val = (old & ~clear) | set;
> >>>>
> >>>> So for now I am not making any changes, if you feel strongly about
> >>>> something one way or another let me know. Anyway these comments should help
> >>>> you understand the patch better so take a look and we can go from there.
> >>>>
> >>>> Thanks.
> >>>> --
> >>>> Ashutosh
> >>>>
> >>>>>> +
> >>>>>> +	mutex_unlock(&hwmon->hwmon_lock);
> >>>>>> +}
> >>>>>> +
> >>>>>>    static umode_t
> >>>>>>    hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
> >>>>>>    {
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
> >>>>>> index 7ca9cf2c34c96..0fcb7de844061 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_hwmon.h
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_hwmon.h
> >>>>>> @@ -7,14 +7,21 @@
> >>>>>>    #ifndef __I915_HWMON_H__
> >>>>>>    #define __I915_HWMON_H__
> >>>>>>    +#include <linux/types.h>
> >>>>>> +
> >>>>>>    struct drm_i915_private;
> >>>>>> +struct intel_gt;
> >>>>>>      #if IS_REACHABLE(CONFIG_HWMON)
> >>>>>>    void i915_hwmon_register(struct drm_i915_private *i915);
> >>>>>>    void i915_hwmon_unregister(struct drm_i915_private *i915);
> >>>>>> +void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old);
> >>>>>> +void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old);
> >>>>>>    #else
> >>>>>>    static inline void i915_hwmon_register(struct drm_i915_private *i915) { };
> >>>>>>    static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { };
> >>>>>> +static inline void i915_hwmon_power_max_disable(struct drm_i915_private *i915, bool *old) { };
> >>>>>> +static inline void i915_hwmon_power_max_restore(struct drm_i915_private *i915, bool old) { };
> >>>>>>    #endif
> >>>>>>      #endif /* __I915_HWMON_H__ */