On Wed, Mar 11, 2015 at 2:21 PM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > On 11.03.2015 16:44, Alex Deucher wrote: >> >> radeon_bo_create() calls radeon_ttm_placement_from_domain() >> before ttm_bo_init() is called. radeon_ttm_placement_from_domain() >> uses the ttm bo size to determine when to select top down >> allocation but since the ttm bo is not initialized yet the >> check is always false. >> >> Noticed-by: Oded Gabbay <oded.gabbay@xxxxxxx> >> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> >> Cc: stable@xxxxxxxxxxxxxxx > > > And I was already wondering why the heck the BOs always made this ping/pong > in memory after creation. > > Patch is Reviewed-by: Christian König <christian.koenig@xxxxxxx> And fixing that promptly broke VCE due to vram location requirements. Updated patch attached. Thoughts? Alex > > Regards, > Christian. > > >> --- >> drivers/gpu/drm/radeon/radeon.h | 3 ++- >> drivers/gpu/drm/radeon/radeon_gem.c | 2 +- >> drivers/gpu/drm/radeon/radeon_mn.c | 2 +- >> drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++------- >> drivers/gpu/drm/radeon/radeon_ttm.c | 12 ++++++++---- >> 5 files changed, 22 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h >> b/drivers/gpu/drm/radeon/radeon.h >> index 5587603..726e89f 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device >> *rdev); >> extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void >> *data); >> extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, >> int enable); >> extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int >> enable); >> -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 >> domain); >> +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 >> domain, >> + u64 size); >> extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); >> extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, >> uint32_t flags); >> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c >> b/drivers/gpu/drm/radeon/radeon_gem.c >> index ac3c131..d613d0c 100644 >> --- a/drivers/gpu/drm/radeon/radeon_gem.c >> +++ b/drivers/gpu/drm/radeon/radeon_gem.c >> @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, >> void *data, >> goto release_object; >> } >> - radeon_ttm_placement_from_domain(bo, >> RADEON_GEM_DOMAIN_GTT); >> + radeon_ttm_placement_from_domain(bo, >> RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size); >> r = ttm_bo_validate(&bo->tbo, &bo->placement, true, >> false); >> radeon_bo_unreserve(bo); >> up_read(¤t->mm->mmap_sem); >> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c >> b/drivers/gpu/drm/radeon/radeon_mn.c >> index a69bd44..e51f09b 100644 >> --- a/drivers/gpu/drm/radeon/radeon_mn.c >> +++ b/drivers/gpu/drm/radeon/radeon_mn.c >> @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct >> mmu_notifier *mn, >> DRM_ERROR("(%d) failed to wait for user >> bo\n", r); >> } >> - radeon_ttm_placement_from_domain(bo, >> RADEON_GEM_DOMAIN_CPU); >> + radeon_ttm_placement_from_domain(bo, >> RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size); >> r = ttm_bo_validate(&bo->tbo, &bo->placement, false, >> false); >> if (r) >> DRM_ERROR("(%d) failed to validate user bo\n", r); >> diff --git a/drivers/gpu/drm/radeon/radeon_object.c >> b/drivers/gpu/drm/radeon/radeon_object.c >> index 43e0994..07f8fd5 100644 >> --- a/drivers/gpu/drm/radeon/radeon_object.c >> +++ b/drivers/gpu/drm/radeon/radeon_object.c >> @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object >> *bo) >> return false; >> } >> -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 >> domain) >> +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, >> + u64 size) >> { >> u32 c = 0, i; >> @@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct >> radeon_bo *rbo, u32 domain) >> * improve fragmentation quality. >> * 512kb was measured as the most optimal number. >> */ >> - if (rbo->tbo.mem.size > 512 * 1024) { >> + if (size > 512 * 1024) { >> for (i = 0; i < c; i++) { >> rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; >> } >> @@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev, >> bo->flags &= ~RADEON_GEM_GTT_WC; >> #endif >> - radeon_ttm_placement_from_domain(bo, domain); >> + radeon_ttm_placement_from_domain(bo, domain, size); >> /* Kernel allocation are uninterruptible */ >> down_read(&rdev->pm.mclk_lock); >> r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, >> @@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 >> domain, u64 max_offset, >> return 0; >> } >> - radeon_ttm_placement_from_domain(bo, domain); >> + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size); >> for (i = 0; i < bo->placement.num_placement; i++) { >> /* force to pin into visible video ram */ >> if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && >> @@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device >> *rdev, >> } >> retry: >> - radeon_ttm_placement_from_domain(bo, domain); >> + radeon_ttm_placement_from_domain(bo, domain, >> bo->tbo.mem.size); >> if (ring == R600_RING_TYPE_UVD_INDEX) >> radeon_uvd_force_into_uvd_segment(bo, >> allowed); >> @@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct >> ttm_buffer_object *bo) >> return 0; >> /* hurrah the memory is not visible ! */ >> - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); >> + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM, >> + rbo->tbo.mem.size); >> lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; >> for (i = 0; i < rbo->placement.num_placement; i++) { >> /* Force into visible VRAM */ >> @@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct >> ttm_buffer_object *bo) >> } >> r = ttm_bo_validate(bo, &rbo->placement, false, false); >> if (unlikely(r == -ENOMEM)) { >> - radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_GTT); >> + radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_GTT, >> + rbo->tbo.mem.size); >> return ttm_bo_validate(bo, &rbo->placement, false, false); >> } else if (unlikely(r != 0)) { >> return r; >> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c >> b/drivers/gpu/drm/radeon/radeon_ttm.c >> index d02aa1d..ce8ed2d 100644 >> --- a/drivers/gpu/drm/radeon/radeon_ttm.c >> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c >> @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct >> ttm_buffer_object *bo, >> switch (bo->mem.mem_type) { >> case TTM_PL_VRAM: >> if >> (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false) >> - radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_CPU); >> + radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_CPU, >> + >> rbo->tbo.mem.size); >> else if (rbo->rdev->mc.visible_vram_size < >> rbo->rdev->mc.real_vram_size && >> bo->mem.start < (rbo->rdev->mc.visible_vram_size >> >> PAGE_SHIFT)) { >> unsigned fpfn = rbo->rdev->mc.visible_vram_size >> >> PAGE_SHIFT; >> @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct >> ttm_buffer_object *bo, >> * BOs to be evicted from VRAM >> */ >> radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_VRAM | >> - >> RADEON_GEM_DOMAIN_GTT); >> + >> RADEON_GEM_DOMAIN_GTT, >> + >> rbo->tbo.mem.size); >> rbo->placement.num_busy_placement = 0; >> for (i = 0; i < rbo->placement.num_placement; i++) >> { >> if (rbo->placements[i].flags & >> TTM_PL_FLAG_VRAM) { >> @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct >> ttm_buffer_object *bo, >> } >> } >> } else >> - radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_GTT); >> + radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_GTT, >> + >> rbo->tbo.mem.size); >> break; >> case TTM_PL_TT: >> default: >> - radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_CPU); >> + radeon_ttm_placement_from_domain(rbo, >> RADEON_GEM_DOMAIN_CPU, >> + rbo->tbo.mem.size); >> } >> *placement = rbo->placement; >> } > >
From aa93fb79095c76182952773836c1e6ed3af971fd Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@xxxxxxx> Date: Wed, 11 Mar 2015 11:27:26 -0400 Subject: [PATCH] drm/radeon: fix TOPDOWN handling for bo_create (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit radeon_bo_create() calls radeon_ttm_placement_from_domain() before ttm_bo_init() is called. radeon_ttm_placement_from_domain() uses the ttm bo size to determine when to select top down allocation but since the ttm bo is not initialized yet the check is always false. v2: only use topdown for vram if the user has not requested CPU access explicitly. Fixes VCE. Noticed-by: Oded Gabbay <oded.gabbay@xxxxxxx> Reviewed-by: Christian König <christian.koenig@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Cc: stable@xxxxxxxxxxxxxxx --- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_mn.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 30 +++++++++++++++++++++--------- drivers/gpu/drm/radeon/radeon_ttm.c | 12 ++++++++---- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- drivers/gpu/drm/radeon/radeon_vce.c | 2 +- 7 files changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5587603..726e89f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device *rdev); extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain); +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, + u64 size); extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c131..d613d0c 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); radeon_bo_unreserve(bo); up_read(¤t->mm->mmap_sem); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index a69bd44..e51f09b 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, DRM_ERROR("(%d) failed to wait for user bo\n", r); } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size); r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (r) DRM_ERROR("(%d) failed to validate user bo\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 43e0994..eee1f9f 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) return false; } -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, + u64 size) { u32 c = 0, i; @@ -179,9 +180,18 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) * improve fragmentation quality. * 512kb was measured as the most optimal number. */ - if (rbo->tbo.mem.size > 512 * 1024) { - for (i = 0; i < c; i++) { - rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; + if (size > 512 * 1024) { + if (domain & RADEON_GEM_DOMAIN_VRAM) { + if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) || + !(rbo->flags & RADEON_GEM_CPU_ACCESS)) { + for (i = 0; i < c; i++) { + rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; + } + } + } else { + for (i = 0; i < c; i++) { + rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; + } } } } @@ -252,7 +262,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags &= ~RADEON_GEM_GTT_WC; #endif - radeon_ttm_placement_from_domain(bo, domain); + radeon_ttm_placement_from_domain(bo, domain, size); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, @@ -350,7 +360,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, return 0; } - radeon_ttm_placement_from_domain(bo, domain); + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size); for (i = 0; i < bo->placement.num_placement; i++) { /* force to pin into visible video ram */ if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && @@ -557,7 +567,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, } retry: - radeon_ttm_placement_from_domain(bo, domain); + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size); if (ring == R600_RING_TYPE_UVD_INDEX) radeon_uvd_force_into_uvd_segment(bo, allowed); @@ -800,7 +810,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) return 0; /* hurrah the memory is not visible ! */ - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM, + rbo->tbo.mem.size); lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; for (i = 0; i < rbo->placement.num_placement; i++) { /* Force into visible VRAM */ @@ -810,7 +821,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) } r = ttm_bo_validate(bo, &rbo->placement, false, false); if (unlikely(r == -ENOMEM)) { - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, + rbo->tbo.mem.size); return ttm_bo_validate(bo, &rbo->placement, false, false); } else if (unlikely(r != 0)) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d02aa1d..ce8ed2d 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false) - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, + rbo->tbo.mem.size); else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size && bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) { unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, * BOs to be evicted from VRAM */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM | - RADEON_GEM_DOMAIN_GTT); + RADEON_GEM_DOMAIN_GTT, + rbo->tbo.mem.size); rbo->placement.num_busy_placement = 0; for (i = 0; i < rbo->placement.num_placement; i++) { if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, } } } else - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, + rbo->tbo.mem.size); break; case TTM_PL_TT: default: - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, + rbo->tbo.mem.size); } *placement = rbo->placement; } diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index c10b2ae..52b2682 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -141,7 +141,7 @@ int radeon_uvd_init(struct radeon_device *rdev) RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + RADEON_GPU_PAGE_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, + RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL, NULL, &rdev->uvd.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 976fe43..3d75502 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -126,7 +126,7 @@ int radeon_vce_init(struct radeon_device *rdev) size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, + RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_CPU_ACCESS, NULL, NULL, &rdev->vce.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); -- 1.8.3.1
_______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel