Change-Id: Ifb0dc95db6a358cf7f76e2a99f94c58637ad6ee6 Signed-off-by: Chunming Zhou <david1.zhou at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 17 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 170 ++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 7 files changed, 187 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4b66585a8638..f398a566f57b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -443,7 +443,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, struct reservation_object *resv, - struct drm_gem_object **obj); + struct drm_gem_object **obj, + struct ttm_process *process); int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index ff89e84b34ce..1cee2125f570 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, - true, NULL, &gobj); + true, NULL, &gobj, &adev->kernel_process); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 37b19ce97699..d5cbdc810aba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -45,7 +45,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, struct reservation_object *resv, - struct drm_gem_object **obj) + struct drm_gem_object **obj, + struct ttm_process *process) { struct amdgpu_bo *bo; int r; @@ -56,8 +57,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, alignment = PAGE_SIZE; } - r = amdgpu_bo_create(adev, size, alignment, initial_domain, - flags, type, resv, &bo); + r = amdgpu_bo_create1(adev, size, alignment, initial_domain, + flags, type, resv, &bo, process); if (r) { DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n", size, initial_domain, alignment, r); @@ -243,7 +244,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, r = amdgpu_gem_object_create(adev, size, args->in.alignment, (u32)(0xffffffff & args->in.domains), - flags, false, resv, &gobj); + flags, false, resv, &gobj, &vm->ttm_vm); if (!r) { abo = gem_to_amdgpu_bo(gobj); abo->tbo.process = &vm->ttm_vm; @@ -273,6 +274,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, { struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; struct amdgpu_bo *bo; @@ -297,7 +300,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU, - 0, 0, NULL, &gobj); + 0, 0, NULL, &gobj, &vm->ttm_vm); if (r) return r; @@ -735,6 +738,8 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; struct drm_gem_object *gobj; uint32_t handle; int r; @@ -747,7 +752,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, - false, NULL, &gobj); + false, NULL, &gobj, &vm->ttm_vm); if (r) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e911db2d1945..7bb6ee777067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -421,6 +421,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; #endif + if (type == ttm_bo_type_kernel) { + bo->tbo.priority = 1; + bo->tbo.process = &adev->kernel_process; + } + bo->tbo.bdev = &adev->mman.bdev; amdgpu_ttm_placement_from_domain(bo, domains); r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, @@ -447,11 +452,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (type == ttm_bo_type_kernel) { - bo->tbo.priority = 1; - bo->tbo.process = &adev->kernel_process; - } - if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; @@ -538,6 +538,166 @@ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, return r; } +static int amdgpu_bo_do_create1(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, + struct reservation_object *resv, + struct amdgpu_bo **bo_ptr, + struct ttm_process *process) +{ + struct ttm_operation_ctx ctx = { + .interruptible = (type != ttm_bo_type_kernel), + .no_wait_gpu = false, + .resv = resv, + .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT + }; + struct amdgpu_bo *bo; + unsigned long page_align; + size_t acc_size; + u32 domains, preferred_domains, allowed_domains; + int r; + + page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; + size = ALIGN(size, PAGE_SIZE); + + if (!amdgpu_bo_validate_size(adev, size, domain)) + return -ENOMEM; + + *bo_ptr = NULL; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU | + AMDGPU_GEM_DOMAIN_GDS | + AMDGPU_GEM_DOMAIN_GWS | + AMDGPU_GEM_DOMAIN_OA); + allowed_domains = preferred_domains; + if (type != ttm_bo_type_kernel && + allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) + allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; + domains = preferred_domains; +retry: + bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); + if (bo == NULL) + return -ENOMEM; + drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); + INIT_LIST_HEAD(&bo->shadow_list); + INIT_LIST_HEAD(&bo->va); + bo->preferred_domains = preferred_domains; + bo->allowed_domains = allowed_domains; + + bo->flags = flags; + +#ifdef CONFIG_X86_32 + /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit + * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 + */ + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; +#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) + /* Don't try to enable write-combining when it can't work, or things + * may be slow + * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 + */ + +#ifndef CONFIG_COMPILE_TEST +#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ + thanks to write-combining +#endif + + if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " + "better performance thanks to write-combining\n"); + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; +#endif + + if (type == ttm_bo_type_kernel) + bo->tbo.priority = 1; + bo->tbo.process = process; + + bo->tbo.bdev = &adev->mman.bdev; + amdgpu_ttm_placement_from_domain(bo, domains); + r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, + &bo->placement, page_align, &ctx, acc_size, + NULL, resv, &amdgpu_ttm_bo_destroy); + if (unlikely(r && r != -ERESTARTSYS) && type == ttm_bo_type_device && + !(flags & AMDGPU_GEM_CREATE_NO_FALLBACK)) { + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { + flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + goto retry; + } else if (domains != allowed_domains) { + domains = allowed_domains; + goto retry; + } + } + if (unlikely(r)) + return r; + + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + bo->tbo.mem.mem_type == TTM_PL_VRAM && + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) + amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, + ctx.bytes_moved); + else + amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); + + if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && + bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { + struct dma_fence *fence; + + r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + if (unlikely(r)) + goto fail_unreserve; + + amdgpu_bo_fence(bo, fence, false); + dma_fence_put(bo->tbo.moving); + bo->tbo.moving = dma_fence_get(fence); + dma_fence_put(fence); + } + if (!resv) + amdgpu_bo_unreserve(bo); + *bo_ptr = bo; + + trace_amdgpu_bo_create(bo); + + /* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */ + if (type == ttm_bo_type_device) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + return 0; + +fail_unreserve: + if (!resv) + ww_mutex_unlock(&bo->tbo.resv->lock); + amdgpu_bo_unref(&bo); + return r; +} + + +int amdgpu_bo_create1(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, + struct reservation_object *resv, + struct amdgpu_bo **bo_ptr, struct ttm_process *process) +{ + uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; + int r; + + r = amdgpu_bo_do_create1(adev, size, byte_align, domain, + parent_flags, type, resv, bo_ptr, process); + if (r) + return r; + return r; +} + int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 546f77cb7882..d4191f0424d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -208,6 +208,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create1(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, + struct reservation_object *resv, + struct amdgpu_bo **bo_ptr, struct ttm_process *process); + int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4c9e10505e2d..591db2df1685 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1535,6 +1535,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); + ttm_process_fini(&adev->kernel_process, &adev->mman.bdev); amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); if (adev->mman.aper_base_kaddr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6ef449ea8d07..3a54b236fd42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2612,6 +2612,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } + ttm_process_fini(&vm->ttm_vm, &adev->mman.bdev); root = amdgpu_bo_ref(vm->root.base.bo); r = amdgpu_bo_reserve(root, true); if (r) { -- 2.14.1