On Tue, May 26, 2020 at 02:08:41PM +1000, Stephen Rothwell wrote: > Hi all, > > On Tue, 19 May 2020 15:09:55 +1000 Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> wrote: > > > > Hi all, > > > > After merging the drm-msm tree, today's linux-next build (arm > > multi_v7_defconfig) failed like this: > > > > ERROR: modpost: "__aeabi_ldivmod" [drivers/gpu/drm/msm/msm.ko] undefined! > > ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/msm/msm.ko] undefined! > > > > Caused by commit > > > > 04d9044f6c57 ("drm/msm/dpu: add support for clk and bw scaling for display") > > > > I applied the following patch for today (this is mechanical, there may > > be a better way): > > > > From: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> > > Date: Tue, 19 May 2020 14:12:39 +1000 > > Subject: [PATCH] drm/msm/dpu: fix up u64/u32 division for 32 bit architectures > > > > Signed-off-by: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c | 23 ++++++++++++++----- > > drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 15 ++++++++---- > > 2 files changed, 28 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c > > index 9697abcbec3f..85c2a4190840 100644 > > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c > > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c > > @@ -10,6 +10,7 @@ > > #include <linux/sort.h> > > #include <linux/clk.h> > > #include <linux/bitmap.h> > > +#include <asm/div64.h> > > > > #include "dpu_kms.h" > > #include "dpu_trace.h" > > @@ -53,8 +54,11 @@ static u64 _dpu_core_perf_calc_bw(struct dpu_kms *kms, > > } > > > > bw_factor = kms->catalog->perf.bw_inefficiency_factor; > > - if (bw_factor) > > - crtc_plane_bw = mult_frac(crtc_plane_bw, bw_factor, 100); > > + if (bw_factor) { > > + u64 quot = crtc_plane_bw; > > + u32 rem = do_div(quot, 100); > > + crtc_plane_bw = (quot * bw_factor) + ((rem * bw_factor) / 100); > > + } > > > > return crtc_plane_bw; > > } > > @@ -89,8 +93,11 @@ static u64 _dpu_core_perf_calc_clk(struct dpu_kms *kms, > > } > > > > clk_factor = kms->catalog->perf.clk_inefficiency_factor; > > - if (clk_factor) > > - crtc_clk = mult_frac(crtc_clk, clk_factor, 100); > > + if (clk_factor) { > > + u64 quot = crtc_clk; > > + u32 rem = do_div(quot, 100); > > + crtc_clk = (quot * clk_factor) + ((rem * clk_factor) / 100); > > + } > > > > return crtc_clk; > > } > > @@ -234,8 +241,12 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, > > } > > } > > > > - avg_bw = kms->num_paths ? > > - perf.bw_ctl / kms->num_paths : 0; > > + if (kms->num_paths) { > > + avg_bw = perf.bw_ctl; > > + do_div(avg_bw, kms->num_paths); > > + } else { > > + avg_bw = 0; > > + } > > > > for (i = 0; i < kms->num_paths; i++) > > icc_set_bw(kms->path[i], > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c > > index c2a6e3dacd68..ad95f32eac13 100644 > > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c > > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c > > @@ -9,6 +9,7 @@ > > > > #include <linux/debugfs.h> > > #include <linux/dma-buf.h> > > +#include <asm/div64.h> > > > > #include <drm/drm_atomic_uapi.h> > > #include <drm/drm_damage_helper.h> > > @@ -174,7 +175,11 @@ static void _dpu_plane_calc_bw(struct drm_plane *plane, > > plane_prefill_bw = > > src_width * hw_latency_lines * fps * fmt->bpp * scale_factor; > > > > - plane_prefill_bw = mult_frac(plane_prefill_bw, mode->vtotal, (vbp+vpw)); > > + { > > + u64 quot = plane_prefill_bw; > > + u32 rem = do_div(plane_prefill_bw, vbp + vpw); > > + plane_prefill_bw = quot * mode->vtotal + rem * mode->vtotal / (vbp + vpw); > > + } > > > > pstate->plane_fetch_bw = max(plane_bw, plane_prefill_bw); > > } > > @@ -204,9 +209,11 @@ static void _dpu_plane_calc_clk(struct drm_plane *plane) > > pstate->plane_clk = > > dst_width * mode->vtotal * fps; > > > > - if (src_height > dst_height) > > - pstate->plane_clk = mult_frac(pstate->plane_clk, > > - src_height, dst_height); > > + if (src_height > dst_height) { > > + u64 quot = pstate->plane_clk; > > + u32 rem = do_div(quot, dst_height); > > + pstate->plane_clk = quot * src_height + rem * src_height / dst_height; > > + } > > } > > > > /** > > -- > > 2.26.2 > > I am still applying the above ... > > -- > Cheers, > Stephen Rothwell Additionally, I see a failure with clang due to the use of Bps_to_icc, which does a straight division by 1000, which is treated as an integer literal, with avg_bw as the dividend, which is a u64. Below is the "hack" in my tree. Cheers, Nathan diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c index 85c2a4190840..5ea725d8da6c 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c @@ -250,7 +250,7 @@ static int _dpu_core_perf_crtc_update_bus(struct dpu_kms *kms, for (i = 0; i < kms->num_paths; i++) icc_set_bw(kms->path[i], - Bps_to_icc(avg_bw), (perf.max_per_pipe_ib)); + div_u64(avg_bw, 1000), (perf.max_per_pipe_ib)); return ret; }