Hi Gustavo, Thanks for the comments. Some inline responses are below! On Fri, 2023-05-12 at 17:43 -0300, Gustavo Sousa wrote: > Quoting Govindapillai, Vinod (2023-05-11 20:24:55) > > Hello > > > > Thanks for the comments. Pls see some inline replies.. > > > > On Thu, 2023-04-27 at 17:24 -0300, Gustavo Sousa wrote: > > > Quoting Vinod Govindapillai (2023-04-27 12:00:15) > > > > From: Mika Kahola <mika.kahola@xxxxxxxxx> > > > > > > > > Display14 introduces a new way to instruct the PUnit with > > > > power and bandwidth requirements of DE. Add the functionality > > > > to program the registers and handle waits using interrupts. > > > > The current wait time for timeouts is programmed for 10 msecs to > > > > factor in the worst case scenarios. Changes made to use REG_BIT > > > > for a register that we touched(GEN8_DE_MISC_IER _MMIO). > > > > > > > > Wa_14016740474 is added which applies to Xe_LPD+ display > > > > > > > > v2: checkpatch warning fixes, simplify program pmdemand part > > > > > > > > v3: update to dbufs and pipes values to pmdemand register(stan) > > > > Removed the macro usage in update_pmdemand_values() > > > > > > > > Bspec: 66451, 64636, 64602, 64603 > > > > Cc: Matt Atwood <matthew.s.atwood@xxxxxxxxx> > > > > Cc: Matt Roper <matthew.d.roper@xxxxxxxxx> > > > > Cc: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> > > > > Cc: Gustavo Sousa <gustavo.sousa@xxxxxxxxx> > > > > Signed-off-by: José Roberto de Souza <jose.souza@xxxxxxxxx> > > > > Signed-off-by: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> > > > > Signed-off-by: Gustavo Sousa <gustavo.sousa@xxxxxxxxx> > > > > Signed-off-by: Mika Kahola <mika.kahola@xxxxxxxxx> > > > > Signed-off-by: Vinod Govindapillai <vinod.govindapillai@xxxxxxxxx> > > > > --- > > > > drivers/gpu/drm/i915/Makefile | 3 +- > > > > drivers/gpu/drm/i915/display/intel_display.c | 7 + > > > > .../gpu/drm/i915/display/intel_display_core.h | 6 + > > > > .../drm/i915/display/intel_display_driver.c | 7 + > > > > .../drm/i915/display/intel_display_power.c | 8 + > > > > drivers/gpu/drm/i915/display/intel_pmdemand.c | 455 ++++++++++++++++++ > > > > drivers/gpu/drm/i915/display/intel_pmdemand.h | 24 + > > > > drivers/gpu/drm/i915/i915_irq.c | 21 +- > > > > drivers/gpu/drm/i915/i915_reg.h | 36 +- > > > > 9 files changed, 562 insertions(+), 5 deletions(-) > > > > create mode 100644 drivers/gpu/drm/i915/display/intel_pmdemand.c > > > > create mode 100644 drivers/gpu/drm/i915/display/intel_pmdemand.h > > > > > > > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > > > > index 9af76e376ca9..eb899fa86e51 100644 > > > > --- a/drivers/gpu/drm/i915/Makefile > > > > +++ b/drivers/gpu/drm/i915/Makefile > > > > @@ -281,7 +281,8 @@ i915-y += \ > > > > display/i9xx_wm.o \ > > > > display/skl_scaler.o \ > > > > display/skl_universal_plane.o \ > > > > - display/skl_watermark.o > > > > + display/skl_watermark.o \ > > > > + display/intel_pmdemand.o > > > > i915-$(CONFIG_ACPI) += \ > > > > display/intel_acpi.o \ > > > > display/intel_opregion.o > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display.c > > > > b/drivers/gpu/drm/i915/display/intel_display.c > > > > index bf391a6cd8d6..f98e235fadc6 100644 > > > > --- a/drivers/gpu/drm/i915/display/intel_display.c > > > > +++ b/drivers/gpu/drm/i915/display/intel_display.c > > > > @@ -99,6 +99,7 @@ > > > > #include "intel_pcode.h" > > > > #include "intel_pipe_crc.h" > > > > #include "intel_plane_initial.h" > > > > +#include "intel_pmdemand.h" > > > > #include "intel_pps.h" > > > > #include "intel_psr.h" > > > > #include "intel_sdvo.h" > > > > @@ -6306,6 +6307,10 @@ int intel_atomic_check(struct drm_device *dev, > > > > return ret; > > > > } > > > > > > > > + ret = intel_pmdemand_atomic_check(state); > > > > + if (ret) > > > > + goto fail; > > > > + > > > > ret = intel_atomic_check_crtcs(state); > > > > if (ret) > > > > goto fail; > > > > @@ -6960,6 +6965,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) > > > > } > > > > > > > > intel_sagv_pre_plane_update(state); > > > > + intel_pmdemand_pre_plane_update(state); > > > > > > > > /* Complete the events for pipes that have now been disabled */ > > > > for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { > > > > @@ -7070,6 +7076,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) > > > > intel_verify_planes(state); > > > > > > > > intel_sagv_post_plane_update(state); > > > > + intel_pmdemand_post_plane_update(state); > > > > > > > > drm_atomic_helper_commit_hw_done(&state->base); > > > > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h > > > > b/drivers/gpu/drm/i915/display/intel_display_core.h > > > > index 9f66d734edf6..9471a052aa57 100644 > > > > --- a/drivers/gpu/drm/i915/display/intel_display_core.h > > > > +++ b/drivers/gpu/drm/i915/display/intel_display_core.h > > > > @@ -345,6 +345,12 @@ struct intel_display { > > > > struct intel_global_obj obj; > > > > } dbuf; > > > > > > > > + struct { > > > > + wait_queue_head_t waitqueue; > > > > + struct mutex lock; > > > > + struct intel_global_obj obj; > > > > + } pmdemand; > > > > + > > > > struct { > > > > /* > > > > * dkl.phy_lock protects against concurrent access of the > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c > > > > b/drivers/gpu/drm/i915/display/intel_display_driver.c > > > > index 60ce10fc7205..79853d8c3240 100644 > > > > --- a/drivers/gpu/drm/i915/display/intel_display_driver.c > > > > +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c > > > > @@ -47,6 +47,7 @@ > > > > #include "intel_opregion.h" > > > > #include "intel_overlay.h" > > > > #include "intel_plane_initial.h" > > > > +#include "intel_pmdemand.h" > > > > #include "intel_pps.h" > > > > #include "intel_quirks.h" > > > > #include "intel_vga.h" > > > > @@ -211,6 +212,8 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) > > > > if (ret < 0) > > > > goto cleanup_vga; > > > > > > > > + intel_pmdemand_init(i915); > > > > + > > > > intel_power_domains_init_hw(i915, false); > > > > > > > > if (!HAS_DISPLAY(i915)) > > > > @@ -240,6 +243,10 @@ int intel_display_driver_probe_noirq(struct drm_i915_private *i915) > > > > if (ret) > > > > goto cleanup_vga_client_pw_domain_dmc; > > > > > > > > + ret = intel_pmdemand_state_init(i915); > > > > + if (ret) > > > > + goto cleanup_vga_client_pw_domain_dmc; > > > > + > > > > init_llist_head(&i915->display.atomic_helper.free_list); > > > > INIT_WORK(&i915->display.atomic_helper.free_work, > > > > intel_atomic_helper_free_state_worker); > > > > diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c > > > > b/drivers/gpu/drm/i915/display/intel_display_power.c > > > > index 5150069f3f82..f5c5a486efbc 100644 > > > > --- a/drivers/gpu/drm/i915/display/intel_display_power.c > > > > +++ b/drivers/gpu/drm/i915/display/intel_display_power.c > > > > @@ -20,6 +20,7 @@ > > > > #include "intel_mchbar_regs.h" > > > > #include "intel_pch_refclk.h" > > > > #include "intel_pcode.h" > > > > +#include "intel_pmdemand.h" > > > > #include "intel_pps_regs.h" > > > > #include "intel_snps_phy.h" > > > > #include "skl_watermark.h" > > > > @@ -1085,6 +1086,10 @@ static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) > > > > dev_priv->display.dbuf.enabled_slices = > > > > intel_enabled_dbuf_slices_mask(dev_priv); > > > > > > > > + if (DISPLAY_VER(dev_priv) >= 14) > > > > + intel_program_dbuf_pmdemand(dev_priv, BIT(DBUF_S1) | > > > > + dev_priv->display.dbuf.enabled_slices); > > > > + > > > > /* > > > > * Just power up at least 1 slice, we will > > > > * figure out later which slices we have and what we need. > > > > @@ -1096,6 +1101,9 @@ static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) > > > > static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) > > > > { > > > > gen9_dbuf_slices_update(dev_priv, 0); > > > > + > > > > + if (DISPLAY_VER(dev_priv) >= 14) > > > > + intel_program_dbuf_pmdemand(dev_priv, 0); > > > > } > > > > > > > > static void gen12_dbuf_slices_config(struct drm_i915_private *dev_priv) > > > > diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.c > > > > b/drivers/gpu/drm/i915/display/intel_pmdemand.c > > > > new file mode 100644 > > > > index 000000000000..df6429e7059d > > > > --- /dev/null > > > > +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.c > > > > @@ -0,0 +1,455 @@ > > > > +// SPDX-License-Identifier: MIT > > > > +/* > > > > + * Copyright © 2024 Intel Corporation > > > > + */ > > > > + > > > > +#include <linux/bitops.h> > > > > + > > > > +#include "i915_drv.h" > > > > +#include "i915_reg.h" > > > > +#include "intel_bw.h" > > > > +#include "intel_cdclk.h" > > > > +#include "intel_cx0_phy.h" > > > > +#include "intel_de.h" > > > > +#include "intel_display.h" > > > > +#include "intel_display_trace.h" > > > > +#include "intel_pmdemand.h" > > > > +#include "skl_watermark.h" > > > > + > > > > +struct intel_pmdemand_state { > > > > + struct intel_global_state base; > > > > + > > > > + u16 qclk_gv_bw; > > > > + u8 voltage_index; > > > > + u8 qclk_gv_index; > > > > + u8 active_pipes; > > > > + u8 dbufs; > > > > + u8 active_phys_plls_mask; > > > > > > Is u8 enough for the mask? The enum phy shows 9 possible PHY_* members. > > > Also, I think having BUILD_BUG_ON() somewhere in this file to make sure > > > we have enough bits would be nice. > > Thanks. updated. > > > > > > > + u16 cdclk_freq_mhz; > > > > + u16 ddiclk_freq_mhz; > > > > + u8 scalers; > > > > +}; > > > > + > > > > +#define to_intel_pmdemand_state(x) container_of((x), struct intel_pmdemand_state, base) > > > > + > > > > +static struct intel_global_state * > > > > +intel_pmdemand_duplicate_state(struct intel_global_obj *obj) > > > > +{ > > > > + struct intel_pmdemand_state *pmdmnd_state; > > > > + > > > > + pmdmnd_state = kmemdup(obj->state, sizeof(*pmdmnd_state), GFP_KERNEL); > > > > + if (!pmdmnd_state) > > > > + return NULL; > > > > + > > > > + return &pmdmnd_state->base; > > > > +} > > > > + > > > > +static void intel_pmdemand_destroy_state(struct intel_global_obj *obj, > > > > + struct intel_global_state *state) > > > > +{ > > > > + kfree(state); > > > > +} > > > > + > > > > +static const struct intel_global_state_funcs intel_pmdemand_funcs = { > > > > + .atomic_duplicate_state = intel_pmdemand_duplicate_state, > > > > + .atomic_destroy_state = intel_pmdemand_destroy_state, > > > > +}; > > > > + > > > > +static struct intel_pmdemand_state * > > > > +intel_atomic_get_pmdemand_state(struct intel_atomic_state *state) > > > > +{ > > > > + struct drm_i915_private *i915 = to_i915(state->base.dev); > > > > + struct intel_global_state *pmdemand_state; > > > > + > > > > + pmdemand_state = > > > > + intel_atomic_get_global_obj_state(state, > > > > + &i915->display.pmdemand.obj); > > > > + if (IS_ERR(pmdemand_state)) > > > > + return ERR_CAST(pmdemand_state); > > > > + > > > > + return to_intel_pmdemand_state(pmdemand_state); > > > > +} > > > > + > > > > +static struct intel_pmdemand_state * > > > > +intel_atomic_get_old_pmdemand_state(struct intel_atomic_state *state) > > > > +{ > > > > + struct drm_i915_private *i915 = to_i915(state->base.dev); > > > > + struct intel_global_state *pmdemand_state; > > > > + > > > > + pmdemand_state = intel_atomic_get_old_global_obj_state(state, &i915- > > > > >display.pmdemand.obj); > > > > > > Wouldn't it be safer if we returned early here when pmdemand_state is > > > NULL? > > > > > > I think to_intel_pmdemand_state(NULL) pmdemand_state just happens to > > > work (i.e. still returns NULL) because the "base" member is at the > > > beginning of the struct. However, we shouldn't rely on that IMO. > > > > Well. It is a valid point. But the base pointer is the first member for exaclty the reason you > > pointed out. So to prevent someone from accidentally move that "base" from that position, I > > added a > > BUILD_BUG_ON() check. There are few other state objects which lack such a check. I will address > > that > > as a separate patch. > > Hm... Not sure I am totally convinced here. If we are enforcing (with > BUILD_BUG_ON()) the base to be the first member, then what is the point in using > container_of() instead of just a simple cast? > > I believe one of the points of using container_of() is that it alows we *not* to > enforce a strict layout of the containing struct for things to work. > > Now, if for some (yet unkown) reason we need to move the "base" member in the > future, it would be difficult to find all places where the pointer could be NULL > but we relied one the assumption that "base" would always be at offset 0. I agree! But for example, bw_state, cdclk_state etc. use similar approach. So assumed the base is the first member for that reason! Anyway I can check for NULL explicitly in this case! > > > > > > > > > > + > > > > + return to_intel_pmdemand_state(pmdemand_state); > > > > +} > > > > + > > > > +static struct intel_pmdemand_state * > > > > +intel_atomic_get_new_pmdemand_state(struct intel_atomic_state *state) > > > > +{ > > > > + struct drm_i915_private *i915 = to_i915(state->base.dev); > > > > + struct intel_global_state *pmdemand_state; > > > > + > > > > + pmdemand_state = intel_atomic_get_new_global_obj_state(state, &i915- > > > > >display.pmdemand.obj); > > > > > > Just as with intel_atomic_get_old_pmdemand_state(), shouldn't we return > > > early if pmdemand_state is NULL here? > > > > > > > + > > > > + return to_intel_pmdemand_state(pmdemand_state); > > > > +} > > > > + > > > > +int intel_pmdemand_state_init(struct drm_i915_private *i915) > > > > +{ > > > > + struct intel_pmdemand_state *pmdemand_state; > > > > + > > > > + pmdemand_state = kzalloc(sizeof(*pmdemand_state), GFP_KERNEL); > > > > + if (!pmdemand_state) > > > > + return -ENOMEM; > > > > + > > > > + intel_atomic_global_obj_init(i915, &i915->display.pmdemand.obj, > > > > + &pmdemand_state->base, > > > > + &intel_pmdemand_funcs); > > > > + > > > > + > > > > + if (IS_MTL_DISPLAY_STEP(i915, STEP_A0, STEP_C0)) > > > > + /* Wa_14016740474 */ > > > > + intel_de_rmw(i915, XELPD_CHICKEN_DCPR_3, 0, DMD_RSP_TIMEOUT_DISABLE); > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +void intel_pmdemand_init(struct drm_i915_private *i915) > > > > +{ > > > > + mutex_init(&i915->display.pmdemand.lock); > > > > + init_waitqueue_head(&i915->display.pmdemand.waitqueue); > > > > +} > > > > > > The functions intel_pmdemand_state_init() and intel_pmdemand_init() are > > > both called from the same place. Furthermore, > > > intel_pmdemand_state_init() isn't only initializing the state, as the > > > Wa_14016740474 workaround is programmed there. Could we have only the > > > function intel_pmdemand_init() and incorporate what > > > intel_pmdemand_state_init() does in it? > > > > I tried that earlier and wasn't possible. Because the intel_power_domains_init_hw() will call > > the > > pmdemand debuf config. And I can't recall the other issue. But I will confirm this again and > > update > > as I am not able to try this locally. > > Well, regarding intel_power_domains_init_hw(), wouldn't the solution be to call > the single init function for PM Demand before that one is called? Am I missing > something? Yeah.. the problem with that is, INIT_LIST_HEAD(&i915->display.global.obj_list); happens in 228,2: intel_mode_config_init(i915); which is called bit later! [ 17.238639] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400 [ 17.238640] PKRU: 55555554 [ 17.238642] Call Trace: [ 17.238643] <TASK> [ 17.238644] intel_atomic_global_obj_init+0x49/0x70 [i915] [ 17.238769] intel_pmdemand_init+0x3f/0xf0 [i915] [ 17.238887] intel_display_driver_probe_noirq+0x94/0x2d0 [i915] [ 17.239005] i915_driver_probe+0x4ce/0xce0 [i915] [ 17.239095] i915_pci_probe+0xc6/0x1f0 [i915] [ 17.239181] pci_device_probe+0x9e/0x160 > > > > > > > > + > > > > +static bool pmdemand_needs_update(struct intel_atomic_state *state) > > > > +{ > > > > + bool changed = false; > > > > + struct intel_crtc *crtc; > > > > + int i; > > > > + const struct intel_bw_state *new_bw_state, *old_bw_state; > > > > + const struct intel_cdclk_state *new_cdclk_state; > > > > + const struct intel_crtc_state *new_crtc_state, *old_crtc_state; > > > > + const struct intel_dbuf_state *new_dbuf_state, *old_dbuf_state; > > > > + > > > > + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, > > > > + new_crtc_state, i) { > > > > + new_bw_state = intel_atomic_get_new_bw_state(state); > > > > + old_bw_state = intel_atomic_get_old_bw_state(state); > > > > + > > > > + new_dbuf_state = intel_atomic_get_new_dbuf_state(state); > > > > + old_dbuf_state = intel_atomic_get_old_dbuf_state(state); > > > > + > > > > + new_cdclk_state = intel_atomic_get_new_cdclk_state(state); > > > > + > > > > + if ((new_bw_state && new_bw_state->qgv_point_peakbw != > > > > + old_bw_state->qgv_point_peakbw) || > > > > + (new_dbuf_state && new_dbuf_state->active_pipes != > > > > + old_dbuf_state->active_pipes) || new_cdclk_state) > > > > + changed = true; > > > > + > > > > + /* > > > > + * break needs to be removed, if some crtc_state dependent > > > > + * parameters are added here > > > > + */ > > > > + break; > > > > + } > > > > + > > > > + return changed; > > > > +} > > > > + > > > > +int intel_pmdemand_atomic_check(struct intel_atomic_state *state) > > > > +{ > > > > + struct drm_i915_private *i915 = to_i915(state->base.dev); > > > > + int port_clock = 0; > > > > + struct intel_crtc *crtc; > > > > + struct intel_encoder *encoder; > > > > + const struct intel_bw_state *new_bw_state; > > > > + const struct intel_cdclk_state *new_cdclk_state; > > > > + const struct intel_crtc_state *new_crtc_state, *old_crtc_state; > > > > + const struct intel_dbuf_state *new_dbuf_state; > > > > + struct intel_pmdemand_state *new_pmdemand_state; > > > > + enum phy phy; > > > > + int i, ret; > > > > + > > > > + if (DISPLAY_VER(i915) < 14) > > > > + return 0; > > > > + > > > > + if (!pmdemand_needs_update(state)) > > > > + return 0; > > > > + > > > > + new_pmdemand_state = intel_atomic_get_pmdemand_state(state); > > > > + if (IS_ERR(new_pmdemand_state)) > > > > + return PTR_ERR(new_pmdemand_state); > > > > + > > > > + ret = intel_atomic_lock_global_state(&new_pmdemand_state->base); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + /* Punit figures out the voltage index based on bandwidth*/ > > > > + new_bw_state = intel_atomic_get_bw_state(state); > > > > + if (IS_ERR(new_bw_state)) > > > > + return PTR_ERR(new_bw_state); > > > > + > > > > + /* firmware will calculate the qclck_gc_index, requirement is set to 0 */ > > > > + new_pmdemand_state->qclk_gv_index = 0; > > > > + new_pmdemand_state->qclk_gv_bw = > > > > + min_t(u16, new_bw_state->qgv_point_peakbw, 0xffff); > > > > + > > > > + new_dbuf_state = intel_atomic_get_dbuf_state(state); > > > > + if (IS_ERR(new_dbuf_state)) > > > > + return PTR_ERR(new_dbuf_state); > > > > + > > > > + i = hweight8(new_dbuf_state->active_pipes); > > > > + new_pmdemand_state->active_pipes = min(i, 3); > > > > + > > > > + new_cdclk_state = intel_atomic_get_cdclk_state(state); > > > > + if (IS_ERR(new_cdclk_state)) > > > > + return PTR_ERR(new_cdclk_state); > > > > + > > > > + new_pmdemand_state->voltage_index = > > > > + new_cdclk_state->logical.voltage_level; > > > > + /* KHz to MHz */ > > > > + new_pmdemand_state->cdclk_freq_mhz = > > > > + DIV_ROUND_UP(new_cdclk_state->logical.cdclk, 1000); > > > > + > > > > + new_pmdemand_state->active_phys_plls_mask = 0; > > > > + > > > > + for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, > > > > + new_crtc_state, i) { > > > > + if (!new_crtc_state->hw.active) > > > > + continue; > > > > + > > > > + encoder = intel_get_crtc_new_encoder(state, new_crtc_state); > > > > + if (!encoder) > > > > + continue; > > > > + > > > > + phy = intel_port_to_phy(i915, encoder->port); > > > > + > > > > + if (intel_is_c10phy(i915, phy)) > > > > + new_pmdemand_state->active_phys_plls_mask |= BIT(phy); > > > > + > > > > + port_clock = max(port_clock, new_crtc_state->port_clock); > > > > + } > > > > > > As previously noted in https://patchwork.freedesktop.org/patch/530495 ;, > > > I'm under the impression that this loop would not let us account for al > > > active crtcs, only those currently being touched by this atomic > > > transaction. Am I wrong to assume that > > > for_each_oldnew_intel_crtc_in_state() would only iterate over crtcs > > > touched by the atomic update? > > > > I checked the intel_bw_check_data_rate() which should be doing something similar to find out > > about > > the datarate from eachi pipe and figure out the total data rate. So I thought this should be > > sufficient. > > I took a quick look at intel_bw_check_data_rate()'s implementation and I think > it works there because struct intel_bw_state has arrays containing values for > each pipe, meaning that those not included in the atomic update would already > contain the current values from previous transactions. > > For the number of active phy PLLs, I believe we can handle it similarly to how > it is done by intel_calc_active_pipes(). > > Now, for the maximum port_clock, maybe we could use the array approach like done > with intel_bw_check_data_rate(). So we can always have all port_clock values > here. Based on my current understanding, the iteration to calculate the max ddiclk seems okay! for_each_ondnew_intel_crtc_state() iterate through all the crtcs and I think we can use max(max, ddiclk[i]) here! Tried some of the tests with multiple displays and seems to find the max ddiclk fine. About the active phys, i changed the logic! Now i iterate through the connector_state and finds the active connectors and update corresponding phys. As we just need the number of active phys, I got rid of the mask for active phys. I will confirm this approach with some experts here as well! Thanks Vinod > > > > > > > > > > + > > > > + /* To MHz */ > > > > + new_pmdemand_state->ddiclk_freq_mhz = DIV_ROUND_UP(port_clock, 1000); > > > > + > > > > + /* > > > > + * Setting scalers to max as it can not be calculated during flips and > > > > + * fastsets without taking global states locks. > > > > + */ > > > > + new_pmdemand_state->scalers = 7; > > > > + > > > > + ret = intel_atomic_serialize_global_state(&new_pmdemand_state->base); > > > > + if (ret) > > > > + return ret; > > > > + > > > > + return 0; > > > > +} > > > > + > > > > +static bool intel_pmdemand_check_prev_transaction(struct drm_i915_private *i915) > > > > +{ > > > > + return !((intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)) & > > > > + XELPDP_PMDEMAND_REQ_ENABLE) || > > > > + (intel_de_read(i915, GEN12_DCPR_STATUS_1) & > > > > + XELPDP_PMDEMAND_INFLIGHT_STATUS)); > > > > +} > > > > + > > > > +static bool intel_pmdemand_req_complete(struct drm_i915_private *i915) > > > > +{ > > > > + return !(intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)) & > > > > + XELPDP_PMDEMAND_REQ_ENABLE); > > > > +} > > > > + > > > > +static int intel_pmdemand_wait(struct drm_i915_private *i915) > > > > +{ > > > > + DEFINE_WAIT(wait); > > > > + int ret; > > > > + const unsigned int timeout_ms = 10; > > > > + > > > > + ret = wait_event_timeout(i915->display.pmdemand.waitqueue, > > > > + intel_pmdemand_req_complete(i915), > > > > + msecs_to_jiffies_timeout(timeout_ms)); > > > > + if (ret == 0) > > > > + drm_err(&i915->drm, > > > > + "timed out waiting for Punit PM Demand Response\n"); > > > > + > > > > + return ret; > > > > +} > > > > + > > > > +/* Required to be programmed during Display Init Sequences. */ > > > > +void intel_program_dbuf_pmdemand(struct drm_i915_private *i915, > > > > + u8 dbuf_slices) > > > > +{ > > > > + u32 dbufs = min_t(u32, hweight8(dbuf_slices), 3); > > > > + > > > > + mutex_lock(&i915->display.pmdemand.lock); > > > > + if (drm_WARN_ON(&i915->drm, > > > > + !intel_pmdemand_check_prev_transaction(i915))) > > > > + goto unlock; > > > > + > > > > + intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0), > > > > + XELPDP_PMDEMAND_DBUFS_MASK, XELPDP_PMDEMAND_DBUFS(dbufs)); > > > > + intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0, > > > > + XELPDP_PMDEMAND_REQ_ENABLE); > > > > + > > > > + intel_pmdemand_wait(i915); > > > > + > > > > +unlock: > > > > + mutex_unlock(&i915->display.pmdemand.lock); > > > > +} > > > > + > > > > +static void update_pmdemand_values(const struct intel_pmdemand_state *new, > > > > + const struct intel_pmdemand_state *old, > > > > + u32 *reg1, u32 *reg2) > > > > +{ > > > > + u32 plls, tmp; > > > > + > > > > + /* > > > > + * The pmdemand parameter updates happens in two steps. Pre plane and > > > > + * post plane updates. During the pre plane, as DE might still be > > > > + * handling with some old operations, to avoid unwanted performance > > > > + * issues, program the pmdemand parameters with higher of old and new > > > > + * values. And then after once settled, use the new parameter values > > > > + * as part of the post plane update. > > > > + */ > > > > + > > > > + /* Set 1*/ > > > > + *reg1 &= ~XELPDP_PMDEMAND_QCLK_GV_BW_MASK; > > > > + tmp = old ? max(old->qclk_gv_bw, new->qclk_gv_bw) : new->qclk_gv_bw; > > > > + *reg1 |= XELPDP_PMDEMAND_QCLK_GV_BW(tmp); > > > > + > > > > + *reg1 &= ~XELPDP_PMDEMAND_VOLTAGE_INDEX_MASK; > > > > + tmp = old ? max(old->voltage_index, new->voltage_index) : > > > > + new->voltage_index; > > > > + *reg1 |= XELPDP_PMDEMAND_VOLTAGE_INDEX(tmp); > > > > + > > > > + *reg1 &= ~XELPDP_PMDEMAND_QCLK_GV_INDEX_MASK; > > > > + tmp = old ? max(old->qclk_gv_index, new->qclk_gv_index) : > > > > + new->qclk_gv_index; > > > > + *reg1 |= XELPDP_PMDEMAND_QCLK_GV_INDEX(tmp); > > > > + > > > > + *reg1 &= ~XELPDP_PMDEMAND_PIPES_MASK; > > > > + tmp = old ? max(old->active_pipes, new->active_pipes) : > > > > + new->active_pipes; > > > > + *reg1 |= XELPDP_PMDEMAND_PIPES(tmp); > > > > + > > > > + *reg1 &= ~XELPDP_PMDEMAND_PHYS_MASK; > > > > + plls = hweight32(new->active_phys_plls_mask); > > > > + if (old) > > > > + plls = max(plls, hweight32(old->active_phys_plls_mask)); > > > > + *reg1 |= XELPDP_PMDEMAND_PHYS(plls); > > > > > > If plls > 7, we would be potentially programming this wrong (e.g. for > > > plls=8, we would setting the field to 0). > > > > Thanks for pointing that out. Fixed. > > > > > > > > > + > > > > + /* Set 2*/ > > > > + *reg2 &= ~XELPDP_PMDEMAND_CDCLK_FREQ_MASK; > > > > + tmp = old ? max(old->cdclk_freq_mhz, new->cdclk_freq_mhz) : > > > > + new->cdclk_freq_mhz; > > > > + *reg2 |= XELPDP_PMDEMAND_CDCLK_FREQ(tmp); > > > > + > > > > + *reg2 &= ~XELPDP_PMDEMAND_DDICLK_FREQ_MASK; > > > > + tmp = old ? max(old->ddiclk_freq_mhz, new->ddiclk_freq_mhz) : > > > > + new->ddiclk_freq_mhz; > > > > + *reg2 |= XELPDP_PMDEMAND_DDICLK_FREQ(tmp); > > > > + > > > > + /* Hard code scalers to 7*/ > > > > > > I think this comment can be dropped: the hardcoding happens in > > > intel_pmdemand_atomic_check(). > > > > Done > > > > > > > + *reg2 &= ~XELPDP_PMDEMAND_SCALERS_MASK; > > > > + tmp = old ? max(old->scalers, new->scalers) : new->scalers; > > > > + *reg2 |= XELPDP_PMDEMAND_SCALERS(tmp); > > > > + > > > > + /* > > > > + * Active_PLLs starts with 1 because of CDCLK PLL. > > > > + * TODO: Missing to account genlock filter when it gets used. > > > > + */ > > > > + *reg2 &= ~XELPDP_PMDEMAND_PLLS_MASK; > > > > > > I think we are missing the ternary operator here to select the maximum > > > value for the pre-plane case. > > As pet the above comments, it is just the plls from step earlier + 1 > > Ah, got it. Thanks. > > > > > > > > > > + *reg2 |= XELPDP_PMDEMAND_PLLS(plls + 1); > > > > +} > > > > + > > > > +static void intel_program_pmdemand(struct drm_i915_private *i915, > > > > + const struct intel_pmdemand_state *new, > > > > + const struct intel_pmdemand_state *old) > > > > +{ > > > > + bool changed = false; > > > > + u32 reg1, mod_reg1; > > > > + u32 reg2, mod_reg2; > > > > + > > > > + mutex_lock(&i915->display.pmdemand.lock); > > > > + if (drm_WARN_ON(&i915->drm, > > > > + !intel_pmdemand_check_prev_transaction(i915))) > > > > + goto unlock; > > > > > > According to the spec, we should wait and timeout after 10ms here. > > > > > > > + > > > > + reg1 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0)); > > > > + mod_reg1 = reg1; > > > > + > > > > + reg2 = intel_de_read(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1)); > > > > + mod_reg2 = reg2; > > > > + > > > > + update_pmdemand_values(new, old, &mod_reg1, &mod_reg2); > > > > + > > > > + if (reg1 != mod_reg1) { > > > > + intel_de_write(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(0), > > > > + mod_reg1); > > > > + changed = true; > > > > + } > > > > + > > > > + if (reg2 != mod_reg2) { > > > > + intel_de_write(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), > > > > + mod_reg2); > > > > + changed = true; > > > > + } > > > > + > > > > + /* Initiate pm demand request only if register values are changed */ > > > > + if (changed) { > > > > > > Nitpick: we could have > > > > > > if (!changed) > > > goto unlock; > > > > > > and dedent the block below. > > > > Done. > > > > > > > + drm_dbg_kms(&i915->drm, > > > > + "initate pmdemand request values: (0x%x 0x%x)\n", > > > > + mod_reg1, mod_reg2); > > > > + > > > > + intel_de_rmw(i915, XELPDP_INITIATE_PMDEMAND_REQUEST(1), 0, > > > > + XELPDP_PMDEMAND_REQ_ENABLE); > > > > + > > > > + intel_pmdemand_wait(i915); > > > > + } > > > > + > > > > +unlock: > > > > + mutex_unlock(&i915->display.pmdemand.lock); > > > > +} > > > > + > > > > +static bool > > > > +intel_pmdemand_state_changed(const struct intel_pmdemand_state *new, > > > > + const struct intel_pmdemand_state *old) > > > > +{ > > > > + return memcmp(&new->qclk_gv_bw, &old->qclk_gv_bw, > > > > + sizeof(*new) - offsetof(typeof(*new), qclk_gv_bw)) != 0; > > > > +} > > > > + > > > > +void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state) > > > > +{ > > > > + struct drm_i915_private *i915 = to_i915(state->base.dev); > > > > + const struct intel_pmdemand_state *new_pmdmnd_state = > > > > + intel_atomic_get_new_pmdemand_state(state); > > > > + const struct intel_pmdemand_state *old_pmdmnd_state = > > > > + intel_atomic_get_old_pmdemand_state(state); > > > > + > > > > + if (DISPLAY_VER(i915) < 14) > > > > + return; > > > > + > > > > + if (!new_pmdmnd_state || > > > > + !intel_pmdemand_state_changed(new_pmdmnd_state, old_pmdmnd_state)) > > > > + return; > > > > + > > > > + intel_program_pmdemand(i915, new_pmdmnd_state, old_pmdmnd_state); > > > > +} > > > > + > > > > +void intel_pmdemand_post_plane_update(struct intel_atomic_state *state) > > > > +{ > > > > + struct drm_i915_private *i915 = to_i915(state->base.dev); > > > > + const struct intel_pmdemand_state *new_pmdmnd_state = > > > > + intel_atomic_get_new_pmdemand_state(state); > > > > + const struct intel_pmdemand_state *old_pmdmnd_state = > > > > + intel_atomic_get_old_pmdemand_state(state); > > > > + > > > > + if (DISPLAY_VER(i915) < 14) > > > > + return; > > > > + > > > > + if (!new_pmdmnd_state || > > > > + !intel_pmdemand_state_changed(new_pmdmnd_state, old_pmdmnd_state)) > > > > + return; > > > > + > > > > + intel_program_pmdemand(i915, new_pmdmnd_state, NULL); > > > > +} > > > > diff --git a/drivers/gpu/drm/i915/display/intel_pmdemand.h > > > > b/drivers/gpu/drm/i915/display/intel_pmdemand.h > > > > new file mode 100644 > > > > index 000000000000..0114f4e0225a > > > > --- /dev/null > > > > +++ b/drivers/gpu/drm/i915/display/intel_pmdemand.h > > > > @@ -0,0 +1,24 @@ > > > > +/* SPDX-License-Identifier: MIT */ > > > > +/* > > > > + * Copyright © 2023 Intel Corporation > > > > + */ > > > > + > > > > +#ifndef __INTEL_PMDEMAND_H__ > > > > +#define __INTEL_PMDEMAND_H__ > > > > + > > > > +#include <linux/types.h> > > > > + > > > > +struct drm_i915_private; > > > > +struct intel_atomic_state; > > > > +struct intel_crtc_state; > > > > +struct intel_plane_state; > > > > + > > > > +void intel_pmdemand_init(struct drm_i915_private *i915); > > > > +int intel_pmdemand_state_init(struct drm_i915_private *i915); > > > > +void intel_program_dbuf_pmdemand(struct drm_i915_private *i915, > > > > + u8 dbuf_slices); > > > > +void intel_pmdemand_pre_plane_update(struct intel_atomic_state *state); > > > > +void intel_pmdemand_post_plane_update(struct intel_atomic_state *state); > > > > +int intel_pmdemand_atomic_check(struct intel_atomic_state *state); > > > > + > > > > +#endif /* __INTEL_PMDEMAND_H__ */ > > > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > > > > index 2b94b8ca8ec9..907fa3aee179 100644 > > > > --- a/drivers/gpu/drm/i915/i915_irq.c > > > > +++ b/drivers/gpu/drm/i915/i915_irq.c > > > > @@ -41,6 +41,7 @@ > > > > #include "display/intel_fifo_underrun.h" > > > > #include "display/intel_hotplug.h" > > > > #include "display/intel_lpe_audio.h" > > > > +#include "display/intel_pmdemand.h" > > > > #include "display/intel_psr.h" > > > > #include "display/intel_psr_regs.h" > > > > > > > > @@ -1986,12 +1987,25 @@ static u32 gen8_de_pipe_fault_mask(struct drm_i915_private > > > > *dev_priv) > > > > return GEN8_DE_PIPE_IRQ_FAULT_ERRORS; > > > > } > > > > > > > > +static void intel_pmdemand_irq_handler(struct drm_i915_private *dev_priv) > > > > +{ > > > > + wake_up_all(&dev_priv->display.pmdemand.waitqueue); > > > > +} > > > > + > > > > static void > > > > gen8_de_misc_irq_handler(struct drm_i915_private *dev_priv, u32 iir) > > > > { > > > > bool found = false; > > > > > > > > - if (iir & GEN8_DE_MISC_GSE) { > > > > + if (DISPLAY_VER(dev_priv) >= 14 && > > > > + (iir & (XELPDP_PMDEMAND_RSP | XELPDP_PMDEMAND_RSPTOUT_ERR))) { > > > > + if (iir & XELPDP_PMDEMAND_RSPTOUT_ERR) > > > > > > I think we should have the (iir & (XELPDP_PMDEMAND_RSP | > > > XELPDP_PMDEMAND_RSPTOUT_ERR)) part as nested if statement here. > > > Otherwise, when the interrupt did not happen, we could endup checking > > > for the GEN8_DE_MISC_GSE even when DISPLAY_VER(dev_priv) >= 14. > > > > > > Even though we know that iir & GEN8_DE_MISC_GSE would be false in this > > > situation (because both XELPDP_PMDEMAND_RSPTOUT_ERR and GEN8_DE_MISC_GSE > > > map to the same bit), I think having that one checked only for previous > > > display engines would sound more correct semantically speaking. > > > > Thanks. updated. > > > > > > > > > + drm_dbg(&dev_priv->drm, > > > > + "Error waiting for Punit PM Demand Response\n"); > > > > + > > > > + intel_pmdemand_irq_handler(dev_priv); > > > > + found = true; > > > > + } else if (iir & GEN8_DE_MISC_GSE) { > > > > intel_opregion_asle_intr(dev_priv); > > > > found = true; > > > > } > > > > @@ -3742,7 +3756,10 @@ static void gen8_de_irq_postinstall(struct drm_i915_private > > > > *dev_priv) > > > > if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) > > > > de_port_masked |= BXT_DE_PORT_GMBUS; > > > > > > > > - if (DISPLAY_VER(dev_priv) >= 11) { > > > > + if (DISPLAY_VER(dev_priv) >= 14) > > > > + de_misc_masked |= XELPDP_PMDEMAND_RSPTOUT_ERR | > > > > + XELPDP_PMDEMAND_RSP; > > > > + else if (DISPLAY_VER(dev_priv) >= 11) { > > > > enum port port; > > > > > > > > if (intel_bios_is_dsi_present(dev_priv, &port)) > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > > > > index dde6e91055bd..60c007aea1ce 100644 > > > > --- a/drivers/gpu/drm/i915/i915_reg.h > > > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > > > @@ -4426,8 +4426,10 @@ > > > > #define GEN8_DE_MISC_IMR _MMIO(0x44464) > > > > #define GEN8_DE_MISC_IIR _MMIO(0x44468) > > > > #define GEN8_DE_MISC_IER _MMIO(0x4446c) > > > > -#define GEN8_DE_MISC_GSE (1 << 27) > > > > -#define GEN8_DE_EDP_PSR (1 << 19) > > > > +#define XELPDP_PMDEMAND_RSPTOUT_ERR REG_BIT(27) > > > > +#define GEN8_DE_MISC_GSE REG_BIT(27) > > > > +#define GEN8_DE_EDP_PSR REG_BIT(19) > > > > +#define XELPDP_PMDEMAND_RSP REG_BIT(3) > > > > > > > > #define GEN8_PCU_ISR _MMIO(0x444e0) > > > > #define GEN8_PCU_IMR _MMIO(0x444e4) > > > > @@ -4512,6 +4514,33 @@ > > > > #define XELPDP_DP_ALT_HPD_LONG_DETECT REG_BIT(1) > > > > #define XELPDP_DP_ALT_HPD_SHORT_DETECT REG_BIT(0) > > > > > > > > +#define XELPDP_INITIATE_PMDEMAND_REQUEST(dword) _MMIO(0x45230 + 4 * (dword)) > > > > +#define XELPDP_PMDEMAND_QCLK_GV_BW_MASK REG_GENMASK(31, 16) > > > > +#define XELPDP_PMDEMAND_QCLK_GV_BW(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_QCLK_GV_BW_MASK, x) > > > > +#define XELPDP_PMDEMAND_VOLTAGE_INDEX_MASK REG_GENMASK(14, 12) > > > > +#define XELPDP_PMDEMAND_VOLTAGE_INDEX(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_VOLTAGE_INDEX_MASK, x) > > > > +#define XELPDP_PMDEMAND_QCLK_GV_INDEX_MASK REG_GENMASK(11, 8) > > > > +#define XELPDP_PMDEMAND_QCLK_GV_INDEX(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_QCLK_GV_INDEX_MASK, x) > > > > +#define XELPDP_PMDEMAND_PIPES_MASK REG_GENMASK(7, 6) > > > > +#define XELPDP_PMDEMAND_PIPES(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_PIPES_MASK, x) > > > > +#define XELPDP_PMDEMAND_DBUFS_MASK REG_GENMASK(5, 4) > > > > +#define XELPDP_PMDEMAND_DBUFS(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_DBUFS_MASK, x) > > > > +#define XELPDP_PMDEMAND_PHYS_MASK REG_GENMASK(2, 0) > > > > +#define XELPDP_PMDEMAND_PHYS(x) REG_FIELD_PREP(XELPDP_PMDEMAND_PHYS_MASK, > > > > x) > > > > + > > > > +#define XELPDP_PMDEMAND_REQ_ENABLE REG_BIT(31) > > > > +#define XELPDP_PMDEMAND_CDCLK_FREQ_MASK REG_GENMASK(30, 20) > > > > +#define XELPDP_PMDEMAND_CDCLK_FREQ(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_CDCLK_FREQ_MASK, x) > > > > +#define XELPDP_PMDEMAND_DDICLK_FREQ_MASK REG_GENMASK(18, 8) > > > > +#define XELPDP_PMDEMAND_DDICLK_FREQ(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_DDICLK_FREQ_MASK, x) > > > > +#define XELPDP_PMDEMAND_SCALERS_MASK REG_GENMASK(6, 4) > > > > +#define XELPDP_PMDEMAND_SCALERS(x) > > > > REG_FIELD_PREP(XELPDP_PMDEMAND_SCALERS_MASK, x) > > > > +#define XELPDP_PMDEMAND_PLLS_MASK REG_GENMASK(2, 0) > > > > +#define XELPDP_PMDEMAND_PLLS(x) REG_FIELD_PREP(XELPDP_PMDEMAND_PLLS_MASK, > > > > x) > > > > + > > > > +#define GEN12_DCPR_STATUS_1 _MMIO(0x46440) > > > > +#define XELPDP_PMDEMAND_INFLIGHT_STATUS REG_BIT(26) > > > > + > > > > #define ILK_DISPLAY_CHICKEN2 _MMIO(0x42004) > > > > /* Required on all Ironlake and Sandybridge according to the B-Spec. */ > > > > #define ILK_ELPIN_409_SELECT REG_BIT(25) > > > > @@ -4671,6 +4700,9 @@ > > > > #define DCPR_SEND_RESP_IMM REG_BIT(25) > > > > #define DCPR_CLEAR_MEMSTAT_DIS REG_BIT(24) > > > > > > > > +#define XELPD_CHICKEN_DCPR_3 _MMIO(0x46438) > > > > +#define DMD_RSP_TIMEOUT_DISABLE REG_BIT(19) > > > > + > > > > #define SKL_DFSM _MMIO(0x51000) > > > > #define SKL_DFSM_DISPLAY_PM_DISABLE (1 << 27) > > > > #define SKL_DFSM_DISPLAY_HDCP_DISABLE (1 << 25) > > > > -- > > > > 2.34.1 > > > > > >