From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Current code appears to live in a misconception that playing with buffer allowed and preferred placements can always control the decision on whether backing store migration will be attempted or not. That is however not the case when userspace sets buffer placements of VRAM+GTT, which is what radv does since commit 862b6a9a ("radv: Improve spilling on discrete GPUs."), with the end result of completely ignoring the migration budget. Fix this by validating against a local singleton placement set to the current backing store location. This way, when migration budget has been depleted, we can prevent ttm_bo_validate from seeing any other than the current placement. For the case of implicit GTT allowed domain added in amdgpu_bo_create when userspace only sets VRAM the behaviour should be the same. On the first pass the re-validation will attempt to migrate away from the fallback GTT domain, and if that did not succeed the buffer will remain in the fallback placement. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Friedrich Vock <friedrich.vock@xxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 112 +++++++++++++++++++------ 1 file changed, 85 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ec888fc6ead8..08e7631f3a2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> +#include <drm/drm_print.h> #include <drm/ttm/ttm_tt.h> #include "amdgpu_cs.h" @@ -775,6 +776,56 @@ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, spin_unlock(&adev->mm_stats.lock); } +static bool +amdgpu_cs_bo_move_under_budget(struct amdgpu_cs_parser *p, + struct amdgpu_bo *abo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + + /* + * Don't move this buffer if we have depleted our allowance + * to move it. Don't move anything if the threshold is zero. + */ + if (p->bytes_moved >= p->bytes_moved_threshold) + return false; + + if ((!abo->tbo.base.dma_buf || + list_empty(&abo->tbo.base.dma_buf->attachments)) && + (!amdgpu_gmc_vram_full_visible(&adev->gmc) && + (abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) && + p->bytes_moved_vis >= p->bytes_moved_vis_threshold) { + /* + * And don't move a CPU_ACCESS_REQUIRED BO to limited + * visible VRAM if we've depleted our allowance to do + * that. + */ + return false; + } + + return true; +} + +static bool +amdgpu_bo_fill_current_placement(struct amdgpu_bo *abo, + struct ttm_placement *placement, + struct ttm_place *place) +{ + struct ttm_placement *bo_placement = &abo->placement; + int i; + + for (i = 0; i < bo_placement->num_placement; i++) { + if (bo_placement->placement[i].mem_type == + abo->tbo.resource->mem_type) { + *place = bo_placement->placement[i]; + placement->num_placement = 1; + placement->placement = place; + return true; + } + } + + return false; +} + static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -784,46 +835,53 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) .no_wait_gpu = false, .resv = bo->tbo.base.resv }; - uint32_t domain; + bool allow_move; int r; if (bo->tbo.pin_count) return 0; - /* Don't move this buffer if we have depleted our allowance - * to move it. Don't move anything if the threshold is zero. - */ - if (p->bytes_moved < p->bytes_moved_threshold && - (!bo->tbo.base.dma_buf || - list_empty(&bo->tbo.base.dma_buf->attachments))) { - if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - /* And don't move a CPU_ACCESS_REQUIRED BO to limited - * visible VRAM if we've depleted our allowance to do - * that. - */ - if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) - domain = bo->preferred_domains; - else - domain = bo->allowed_domains; - } else { - domain = bo->preferred_domains; - } - } else { - domain = bo->allowed_domains; - } + if (!bo->tbo.resource || amdgpu_cs_bo_move_under_budget(p, bo)) + allow_move = true; + else + allow_move = false; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); retry: - amdgpu_bo_placement_from_domain(bo, domain); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (allow_move) { + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + } else { + struct ttm_placement same_placement = { }; + struct ttm_place same_place; + bool placement_found; + + placement_found = + amdgpu_bo_fill_current_placement(bo, + &same_placement, + &same_place); + + if (drm_WARN_ON_ONCE(&adev->ddev, !placement_found)) { + /* + * QQQ + * Current placement not in the bo->allowed_domains set. + * Can this happen? + * QQQ + */ + allow_move = true; + goto retry; + } + + r = ttm_bo_validate(&bo->tbo, &same_placement, &ctx); + } p->bytes_moved += ctx.bytes_moved; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && amdgpu_res_cpu_visible(adev, bo->tbo.resource)) p->bytes_moved_vis += ctx.bytes_moved; - if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { - domain = bo->allowed_domains; + if (unlikely(r == -ENOMEM) && !allow_move) { + ctx.bytes_moved = 0; + allow_move = true; goto retry; } -- 2.44.0