TTM now takes care of moving buffers to the best possible domain. Signed-off-by: Friedrich Vock <friedrich.vock@xxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 191 +-------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 4 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 - 4 files changed, 3 insertions(+), 201 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cac0ca64367b3..3004adc6fa679 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1404,8 +1404,6 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev); bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, - u64 num_vis_bytes); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e9168677ef0a6..92a0cffc1adc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -638,196 +638,19 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) return 0; } -/* Convert microseconds to bytes. */ -static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) -{ - if (us <= 0 || !adev->mm_stats.log2_max_MBps) - return 0; - - /* Since accum_us is incremented by a million per second, just - * multiply it by the number of MB/s to get the number of bytes. - */ - return us << adev->mm_stats.log2_max_MBps; -} - -static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) -{ - if (!adev->mm_stats.log2_max_MBps) - return 0; - - return bytes >> adev->mm_stats.log2_max_MBps; -} - -/* Returns how many bytes TTM can move right now. If no bytes can be moved, - * it returns 0. If it returns non-zero, it's OK to move at least one buffer, - * which means it can go over the threshold once. If that happens, the driver - * will be in debt and no other buffer migrations can be done until that debt - * is repaid. - * - * This approach allows moving a buffer of any size (it's important to allow - * that). - * - * The currency is simply time in microseconds and it increases as the clock - * ticks. The accumulated microseconds (us) are converted to bytes and - * returned. - */ -static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, - u64 *max_bytes, - u64 *max_vis_bytes) -{ - s64 time_us, increment_us; - u64 free_vram, total_vram, used_vram; - /* Allow a maximum of 200 accumulated ms. This is basically per-IB - * throttling. - * - * It means that in order to get full max MBps, at least 5 IBs per - * second must be submitted and not more than 200ms apart from each - * other. - */ - const s64 us_upper_bound = 200000; - - if (!adev->mm_stats.log2_max_MBps) { - *max_bytes = 0; - *max_vis_bytes = 0; - return; - } - - total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); - used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); - free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; - - spin_lock(&adev->mm_stats.lock); - - /* Increase the amount of accumulated us. */ - time_us = ktime_to_us(ktime_get()); - increment_us = time_us - adev->mm_stats.last_update_us; - adev->mm_stats.last_update_us = time_us; - adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, - us_upper_bound); - - /* This prevents the short period of low performance when the VRAM - * usage is low and the driver is in debt or doesn't have enough - * accumulated us to fill VRAM quickly. - * - * The situation can occur in these cases: - * - a lot of VRAM is freed by userspace - * - the presence of a big buffer causes a lot of evictions - * (solution: split buffers into smaller ones) - * - * If 128 MB or 1/8th of VRAM is free, start filling it now by setting - * accum_us to a positive number. - */ - if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { - s64 min_us; - - /* Be more aggressive on dGPUs. Try to fill a portion of free - * VRAM now. - */ - if (!(adev->flags & AMD_IS_APU)) - min_us = bytes_to_us(adev, free_vram / 4); - else - min_us = 0; /* Reset accum_us on APUs. */ - - adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); - } - - /* This is set to 0 if the driver is in debt to disallow (optional) - * buffer moves. - */ - *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); - - /* Do the same for visible VRAM if half of it is free */ - if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { - u64 total_vis_vram = adev->gmc.visible_vram_size; - u64 used_vis_vram = - amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); - - if (used_vis_vram < total_vis_vram) { - u64 free_vis_vram = total_vis_vram - used_vis_vram; - - adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis + - increment_us, us_upper_bound); - - if (free_vis_vram >= total_vis_vram / 2) - adev->mm_stats.accum_us_vis = - max(bytes_to_us(adev, free_vis_vram / 2), - adev->mm_stats.accum_us_vis); - } - - *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis); - } else { - *max_vis_bytes = 0; - } - - spin_unlock(&adev->mm_stats.lock); -} - -/* Report how many bytes have really been moved for the last command - * submission. This can result in a debt that can stop buffer migrations - * temporarily. - */ -void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, - u64 num_vis_bytes) -{ - spin_lock(&adev->mm_stats.lock); - adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); - adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes); - spin_unlock(&adev->mm_stats.lock); -} - static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_cs_parser *p = param; struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, .resv = bo->tbo.base.resv }; - uint32_t domain; - int r; if (bo->tbo.pin_count) return 0; - /* Don't move this buffer if we have depleted our allowance - * to move it. Don't move anything if the threshold is zero. - */ - if (p->bytes_moved < p->bytes_moved_threshold && - (!bo->tbo.base.dma_buf || - list_empty(&bo->tbo.base.dma_buf->attachments))) { - if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - /* And don't move a CPU_ACCESS_REQUIRED BO to limited - * visible VRAM if we've depleted our allowance to do - * that. - */ - if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) - domain = bo->preferred_domains; - else - domain = bo->allowed_domains; - } else { - domain = bo->preferred_domains; - } - } else { - domain = bo->allowed_domains; - } - -retry: - amdgpu_bo_placement_from_domain(bo, domain); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - - p->bytes_moved += ctx.bytes_moved; - if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_res_cpu_visible(adev, bo->tbo.resource)) - p->bytes_moved_vis += ctx.bytes_moved; - - if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { - domain = bo->allowed_domains; - goto retry; - } - - return r; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); } static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, @@ -947,13 +770,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->user_pages = NULL; } - amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, - &p->bytes_moved_vis_threshold); - p->bytes_moved = 0; - p->bytes_moved_vis = 0; - r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, - amdgpu_cs_bo_validate, p); + amdgpu_cs_bo_validate, NULL); if (r) { DRM_ERROR("amdgpu_vm_validate() failed.\n"); goto out_free_user_pages; @@ -973,9 +791,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); } - amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, - p->bytes_moved_vis); - for (i = 0; i < p->gang_size; ++i) amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj, p->bo_list->gws_obj, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index 39c33ad100cb7..e3d04ac4764be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -67,10 +67,6 @@ struct amdgpu_cs_parser { struct amdgpu_bo_list *bo_list; struct amdgpu_mn *mn; struct dma_fence *fence; - uint64_t bytes_moved_threshold; - uint64_t bytes_moved_vis_threshold; - uint64_t bytes_moved; - uint64_t bytes_moved_vis; /* user fence */ struct amdgpu_bo *uf_bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 331b9ed8062c7..5834a95d680d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -621,13 +621,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (unlikely(r != 0)) return r; - if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_res_cpu_visible(adev, bo->tbo.resource)) - amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, - ctx.bytes_moved); - else - amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; -- 2.44.0