On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote: > Hi Tvrtko, > On 23/05/2023 16:19, Ashutosh Dixit wrote: > > No functional changes but we can remove some unsightly index computation > > and read/write functions if we convert the PMU sample array from a > > one-dimensional to a two-dimensional array. > > > > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++----------------------- > > drivers/gpu/drm/i915/i915_pmu.h | 2 +- > > 2 files changed, 19 insertions(+), 43 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > > index b47d890d4ada1..137e0df9573ee 100644 > > --- a/drivers/gpu/drm/i915/i915_pmu.c > > +++ b/drivers/gpu/drm/i915/i915_pmu.c > > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt) > > return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); > > } > > -static unsigned int > > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample) > > -{ > > - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample; > > - > > - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample)); > > - > > - return idx; > > -} > > - > > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) > > -{ > > - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; > > -} > > - > > -static void > > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) > > -{ > > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; > > -} > > - > > -static void > > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) > > -{ > > - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); > > -} > > IMO read and store helpers could have stayed and just changed the > implementation. Like add_sample_mult which you just moved. I would have > been a smaller patch. So dunno.. a bit of a reluctant r-b. Are you referring just to add_sample_mult or to all the other functions too? add_sample_mult I moved it to where it was before bc4be0a38b63 ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have left it here I guess. The other read and store helpers are not needed with the 2-d array at all since the compiler itself will do that, so I thought it was better to get rid of them completely. Let me know if you want any changes, otherwise I will leave as is. > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Thanks for the review. Thanks Andrzej too :) -- Ashutosh > > - > > static u64 get_rc6(struct intel_gt *gt) > > { > > struct drm_i915_private *i915 = gt->i915; > > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt) > > spin_lock_irqsave(&pmu->lock, flags); > > if (awake) { > > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); > > + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val; > > } else { > > /* > > * We think we are runtime suspended. > > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt) > > * counter value. > > */ > > val = ktime_since_raw(pmu->sleep_last[gt_id]); > > - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6); > > + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur; > > } > > - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED)) > > - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED); > > + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur) > > + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur; > > else > > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val); > > + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; > > spin_unlock_irqrestore(&pmu->lock, flags); > > @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu) > > with_intel_runtime_pm(gt->uncore->rpm, wakeref) { > > u64 val = __get_rc6(gt); > > - store_sample(pmu, i, __I915_SAMPLE_RC6, val); > > - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED, > > - val); > > + pmu->sample[i][__I915_SAMPLE_RC6].cur = val; > > + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; > > pmu->sleep_last[i] = ktime_get_raw(); > > } > > } > > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt) > > { > > struct i915_pmu *pmu = >->i915->pmu; > > - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt)); > > + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt); > > pmu->sleep_last[gt->info.id] = ktime_get_raw(); > > } > > @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int > > period_ns) > > } > > } > > +static void > > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) > > +{ > > + sample->cur += mul_u32_u32(val, mul); > > +} > > + > > static bool > > frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) > > { > > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) > > if (!val) > > val = intel_gpu_freq(rps, rps->cur_freq); > > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT, > > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT], > > val, period_ns / 1000); > > } > > if (pmu->enable & > > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { > > - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ, > > + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ], > > intel_rps_get_requested_frequency(rps), > > period_ns / 1000); > > } > > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event) > > switch (config) { > > case I915_PMU_ACTUAL_FREQUENCY: > > val = > > - div_u64(read_sample(pmu, gt_id, > > - __I915_SAMPLE_FREQ_ACT), > > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur, > > USEC_PER_SEC /* to MHz */); > > break; > > case I915_PMU_REQUESTED_FREQUENCY: > > val = > > - div_u64(read_sample(pmu, gt_id, > > - __I915_SAMPLE_FREQ_REQ), > > + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur, > > USEC_PER_SEC /* to MHz */); > > break; > > case I915_PMU_INTERRUPTS: > > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h > > index 33d80fbaab8bc..d20592e7db999 100644 > > --- a/drivers/gpu/drm/i915/i915_pmu.h > > +++ b/drivers/gpu/drm/i915/i915_pmu.h > > @@ -127,7 +127,7 @@ struct i915_pmu { > > * Only global counters are held here, while the per-engine ones are in > > * struct intel_engine_cs. > > */ > > - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS]; > > + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS]; > > /** > > * @sleep_last: Last time GT parked for RC6 estimation. > > */