This was originally un-inlined by Andi Kleen in 2011 citing size concerns. Indeed, a first attempt at inlining it grew radeon.ko by 7%. However, 2% of cpu is spent in this function. Simply inlining it gave 1% more fps in Urban Terror. v2: We know the minimum MMIO size. Adding it to the if allows the compiler to optimize the branch out, improving both performance and size. The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but common sense says perf is now more than 1% better. Signed-off-by: Lauri Kasanen <cand@xxxxxxx> --- drivers/gpu/drm/radeon/r100.c | 18 ------------------ drivers/gpu/drm/radeon/radeon.h | 21 +++++++++++++++++++-- 2 files changed, 19 insertions(+), 20 deletions(-) The _wreg function could be given similar treatment, but as it's nowhere as hot (0.009% vs 2%), didn't bother. diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index b6c3264..8169e82 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4086,24 +4086,6 @@ int r100_init(struct radeon_device *rdev) return 0; } -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, - bool always_indirect) -{ - if (reg < rdev->rmmio_size && !always_indirect) - return readl(((void __iomem *)rdev->rmmio) + reg); - else { - unsigned long flags; - uint32_t ret; - - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); - - return ret; - } -} - void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, bool always_indirect) { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f21db7a..883276a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2328,8 +2328,25 @@ int radeon_device_init(struct radeon_device *rdev, void radeon_device_fini(struct radeon_device *rdev); int radeon_gpu_wait_for_idle(struct radeon_device *rdev); -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, - bool always_indirect); +static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, + bool always_indirect) +{ + /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */ + if ((reg < rdev->rmmio_size || reg < 0x10000) && !always_indirect) + return readl(((void __iomem *)rdev->rmmio) + reg); + else { + unsigned long flags; + uint32_t ret; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); + + return ret; + } +} + void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, bool always_indirect); u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); -- 1.8.3.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel