On Wed, 24 May 2023 10:53:20 -0700, Tvrtko Ursulin wrote: > Hi Tvrtko, > On 24/05/2023 18:38, Dixit, Ashutosh wrote: > > On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote: > >> On 23/05/2023 16:19, Ashutosh Dixit wrote: > >>> No functional changes but we can remove some unsightly index computation > >>> and read/write functions if we convert the PMU sample array from a > >>> one-dimensional to a two-dimensional array. > >>> > >>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > >>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@xxxxxxxxx> > >>> --- > >>> drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++----------------------- > >>> drivers/gpu/drm/i915/i915_pmu.h | 2 +- > >>> 2 files changed, 19 insertions(+), 43 deletions(-) > >>> > >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > >>> index b47d890d4ada1..137e0df9573ee 100644 > >>> --- a/drivers/gpu/drm/i915/i915_pmu.c > >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c > >>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt) > >>> return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); > >>> } > >>> -static unsigned int > >>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample) > >>> -{ > >>> - unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample; > >>> - > >>> - GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample)); > >>> - > >>> - return idx; > >>> -} > >>> - > >>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) > >>> -{ > >>> - return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur; > >>> -} > >>> - > >>> -static void > >>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) > >>> -{ > >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val; > >>> -} > >>> - > >>> -static void > >>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) > >>> -{ > >>> - pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul); > >>> -} > >> > >> IMO read and store helpers could have stayed and just changed the > >> implementation. Like add_sample_mult which you just moved. I would have > >> been a smaller patch. So dunno.. a bit of a reluctant r-b. > > > > Are you referring just to add_sample_mult or to all the other functions > > too? add_sample_mult I moved it to where it was before bc4be0a38b63 > > Read and store helpers. > > > ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have > > left it here I guess. > > > > The other read and store helpers are not needed with the 2-d array at all > > since the compiler itself will do that, so I thought it was better to get > > rid of them completely. > > Yes I get it, just that I didn't see the benefit of removing them. > > For example: > > - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); > + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val; > > It's a meh for me. Either flavour looks fine to me so I would have erred on > the side of keeping the patch small. If anything I probably slightly prefer > that the struct pmu_sample implementation was able to be changed with less > churn before. For example. But a very minor argument really. OK, I finally understood and have made this change in Patch v2. Please take a look. > > Or maybe next step is get rid of the struct i915_pmu_sample. It is a struct > because originally previous value was tracked too. Then I removed that and > it was easier to keep the struct. I guess it can go now and then the > removal of helpers here will look somewhat nicer without the trailing .cur > on every affected line. I have left this as is for now in case the i915_pmu_sample need to be expanded again. Should be ok with the read/store helpers. > > > Let me know if you want any changes, otherwise I will leave as is. > > You can leave it as is, I dont' mind much. I went ahead and changed it anyway since you seemed to want it. Thanks. -- Ashutosh > > >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > > > Thanks for the review. Thanks Andrzej too :) > > -- > > Ashutosh > > > >>> - > >>> static u64 get_rc6(struct intel_gt *gt) > >>> { > >>> struct drm_i915_private *i915 = gt->i915; > >>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt) > >>> spin_lock_irqsave(&pmu->lock, flags); > >>> if (awake) { > >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); > >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val; > >>> } else { > >>> /* > >>> * We think we are runtime suspended. > >>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt) > >>> * counter value. > >>> */ > >>> val = ktime_since_raw(pmu->sleep_last[gt_id]); > >>> - val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6); > >>> + val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur; > >>> } > >>> - if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED)) > >>> - val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED); > >>> + if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur) > >>> + val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur; > >>> else > >>> - store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val); > >>> + pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; > >>> spin_unlock_irqrestore(&pmu->lock, flags); > >>> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu) > >>> with_intel_runtime_pm(gt->uncore->rpm, wakeref) { > >>> u64 val = __get_rc6(gt); > >>> - store_sample(pmu, i, __I915_SAMPLE_RC6, val); > >>> - store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED, > >>> - val); > >>> + pmu->sample[i][__I915_SAMPLE_RC6].cur = val; > >>> + pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; > >>> pmu->sleep_last[i] = ktime_get_raw(); > >>> } > >>> } > >>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt) > >>> { > >>> struct i915_pmu *pmu = >->i915->pmu; > >>> - store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt)); > >>> + pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt); > >>> pmu->sleep_last[gt->info.id] = ktime_get_raw(); > >>> } > >>> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int > >>> period_ns) > >>> } > >>> } > >>> +static void > >>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) > >>> +{ > >>> + sample->cur += mul_u32_u32(val, mul); > >>> +} > >>> + > >>> static bool > >>> frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) > >>> { > >>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) > >>> if (!val) > >>> val = intel_gpu_freq(rps, rps->cur_freq); > >>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT, > >>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT], > >>> val, period_ns / 1000); > >>> } > >>> if (pmu->enable & > >>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { > >>> - add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ, > >>> + add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ], > >>> intel_rps_get_requested_frequency(rps), > >>> period_ns / 1000); > >>> } > >>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event) > >>> switch (config) { > >>> case I915_PMU_ACTUAL_FREQUENCY: > >>> val = > >>> - div_u64(read_sample(pmu, gt_id, > >>> - __I915_SAMPLE_FREQ_ACT), > >>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur, > >>> USEC_PER_SEC /* to MHz */); > >>> break; > >>> case I915_PMU_REQUESTED_FREQUENCY: > >>> val = > >>> - div_u64(read_sample(pmu, gt_id, > >>> - __I915_SAMPLE_FREQ_REQ), > >>> + div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur, > >>> USEC_PER_SEC /* to MHz */); > >>> break; > >>> case I915_PMU_INTERRUPTS: > >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h > >>> index 33d80fbaab8bc..d20592e7db999 100644 > >>> --- a/drivers/gpu/drm/i915/i915_pmu.h > >>> +++ b/drivers/gpu/drm/i915/i915_pmu.h > >>> @@ -127,7 +127,7 @@ struct i915_pmu { > >>> * Only global counters are held here, while the per-engine ones are in > >>> * struct intel_engine_cs. > >>> */ > >>> - struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS]; > >>> + struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS]; > >>> /** > >>> * @sleep_last: Last time GT parked for RC6 estimation. > >>> */