El mar, 09-07-2024 a las 17:34 +0100, Tvrtko Ursulin escribió: > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> > > Removing the intermediate buffer removes the last use of the > V3D_MAX_COUNTERS define, which will enable further driver cleanup. > > While at it pull the 32 vs 64 bit copying decision outside the loop > in > order to reduce the number of conditional instructions. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> > --- > drivers/gpu/drm/v3d/v3d_sched.c | 60 ++++++++++++++++++++----------- > -- > 1 file changed, 37 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c > b/drivers/gpu/drm/v3d/v3d_sched.c > index fc8730264386..77f795e38fad 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -421,18 +421,23 @@ v3d_reset_timestamp_queries(struct v3d_cpu_job > *job) > v3d_put_bo_vaddr(bo); > } > > +static void write_to_buffer_32(u32 *dst, unsigned int idx, u32 > value) > +{ > + dst[idx] = value; > +} > + > +static void write_to_buffer_64(u64 *dst, unsigned int idx, u64 > value) > +{ > + dst[idx] = value; > +} > + > static void > -write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value) > +write_to_buffer(void *dst, unsigned int idx, bool do_64bit, u64 > value) > { > - if (do_64bit) { > - u64 *dst64 = (u64 *)dst; > - > - dst64[idx] = value; > - } else { > - u32 *dst32 = (u32 *)dst; > - > - dst32[idx] = (u32)value; > - } > + if (do_64bit) > + write_to_buffer_64(dst, idx, value); > + else > + write_to_buffer_32(dst, idx, value); > } > > static void > @@ -505,18 +510,23 @@ v3d_reset_performance_queries(struct > v3d_cpu_job *job) > } > > static void > -v3d_write_performance_query_result(struct v3d_cpu_job *job, void > *data, u32 query) > +v3d_write_performance_query_result(struct v3d_cpu_job *job, void > *data, > + unsigned int query) > { > - struct v3d_performance_query_info *performance_query = &job- > >performance_query; > - struct v3d_copy_query_results_info *copy = &job->copy; > + struct v3d_performance_query_info *performance_query = > + &job- > >performance_query; > struct v3d_file_priv *v3d_priv = job->base.file- > >driver_priv; > struct v3d_dev *v3d = job->base.v3d; > - struct v3d_perfmon *perfmon; > - u64 counter_values[V3D_MAX_COUNTERS]; > + unsigned int i, j, offset; > > - for (int i = 0; i < performance_query->nperfmons; i++) { > - perfmon = v3d_perfmon_find(v3d_priv, > - performance_query- > >queries[query].kperfmon_ids[i]); > + for (i = 0, offset = 0; > + i < performance_query->nperfmons; > + i++, offset += DRM_V3D_MAX_PERF_COUNTERS) { > + struct v3d_performance_query *q = > + &performance_query->queries[query]; Looks like we could move this before the loop. Otherwise this patch is: Reviewed-by: Iago Toral Quiroga <itoral@xxxxxxxxxx> > + struct v3d_perfmon *perfmon; > + > + perfmon = v3d_perfmon_find(v3d_priv, q- > >kperfmon_ids[i]); > if (!perfmon) { > DRM_DEBUG("Failed to find perfmon."); > continue; > @@ -524,14 +534,18 @@ v3d_write_performance_query_result(struct > v3d_cpu_job *job, void *data, u32 quer > > v3d_perfmon_stop(v3d, perfmon, true); > > - memcpy(&counter_values[i * > DRM_V3D_MAX_PERF_COUNTERS], perfmon->values, > - perfmon->ncounters * sizeof(u64)); > + if (job->copy.do_64bit) { > + for (j = 0; j < perfmon->ncounters; j++) > + write_to_buffer_64(data, offset + j, > + perfmon- > >values[j]); > + } else { > + for (j = 0; j < perfmon->ncounters; j++) > + write_to_buffer_32(data, offset + j, > + perfmon- > >values[j]); > + } > > v3d_perfmon_put(perfmon); > } > - > - for (int i = 0; i < performance_query->ncounters; i++) > - write_to_buffer(data, i, copy->do_64bit, > counter_values[i]); > } > > static void