On Thu, Apr 10, 2014 at 9:08 AM, Lauri Kasanen <cand@xxxxxxx> wrote: > This was originally un-inlined by Andi Kleen in 2011 citing size concerns. > Indeed, inlining it grows radeon.ko by 7%. > > However, 2% of cpu is spent in this function. Inlining it gives 1% more fps > in Urban Terror. > > Signed-off-by: Lauri Kasanen <cand@xxxxxxx> > --- > drivers/gpu/drm/radeon/r100.c | 18 ------------------ > drivers/gpu/drm/radeon/radeon.h | 20 ++++++++++++++++++-- > 2 files changed, 18 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c > index b6c3264..8169e82 100644 > --- a/drivers/gpu/drm/radeon/r100.c > +++ b/drivers/gpu/drm/radeon/r100.c > @@ -4086,24 +4086,6 @@ int r100_init(struct radeon_device *rdev) > return 0; > } > > -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, > - bool always_indirect) > -{ > - if (reg < rdev->rmmio_size && !always_indirect) > - return readl(((void __iomem *)rdev->rmmio) + reg); > - else { > - unsigned long flags; > - uint32_t ret; > - > - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); > - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); > - ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); > - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); > - > - return ret; > - } > -} > - > void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, > bool always_indirect) > { > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 5cf10a7..9231100 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -2330,8 +2330,24 @@ int radeon_device_init(struct radeon_device *rdev, > void radeon_device_fini(struct radeon_device *rdev); > int radeon_gpu_wait_for_idle(struct radeon_device *rdev); > > -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, > - bool always_indirect); > +static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, > + bool always_indirect) > +{ > + if (reg < rdev->rmmio_size && !always_indirect) > + return readl(((void __iomem *)rdev->rmmio) + reg); Quick thought from someone entirely unfamiliar with the hardware: perhaps you can get the performance benefit without the size increase by moving the else portion into a non-inline function? I'm guessing that most accesses happen in the "if" branch. > + else { > + unsigned long flags; > + uint32_t ret; > + > + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); > + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); > + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); > + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); > + > + return ret; > + } > +} > + > void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, > bool always_indirect); > u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); > -- > 1.8.3.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel