From: Marek Olšák <marek.olsak@xxxxxxx> I've chosen to do it like this because it's easy and allows an arbitrary number of processes. Signed-off-by: Marek Olšák <marek.olsak at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 -- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 -- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 - drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 - drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 -- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +----- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125 +++++--------------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++-------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++------------- include/uapi/drm/amdgpu_drm.h | 15 +-- 19 files changed, 109 insertions(+), 519 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index b80243d3972e..7264a4930b88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, / sizeof(struct amdgpu_bo_list_entry)) return -EINVAL; size = sizeof(struct amdgpu_bo_list); size += num_entries * sizeof(struct amdgpu_bo_list_entry); list = kvmalloc(size, GFP_KERNEL); if (!list) return -ENOMEM; kref_init(&list->refcount); - list->gds_obj = adev->gds.gds_gfx_bo; - list->gws_obj = adev->gds.gws_gfx_bo; - list->oa_obj = adev->gds.oa_gfx_bo; array = amdgpu_bo_list_array_entry(list, 0); memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); for (i = 0; i < num_entries; ++i) { struct amdgpu_bo_list_entry *entry; struct drm_gem_object *gobj; struct amdgpu_bo *bo; struct mm_struct *usermm; @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, } else { entry = &array[last_entry++]; } entry->robj = bo; entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &entry->robj->tbo; entry->tv.shared = !entry->robj->prime_shared_count; - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) - list->gds_obj = entry->robj; - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) - list->gws_obj = entry->robj; - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) - list->oa_obj = entry->robj; - total_size += amdgpu_bo_size(entry->robj); trace_amdgpu_bo_list_set(list, entry->robj); } list->first_userptr = first_userptr; list->num_entries = num_entries; trace_amdgpu_cs_bo_status(list->num_entries, total_size); *result = list; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 61b089768e1c..30f12a60aa28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; int user_invalidated; }; struct amdgpu_bo_list { struct rcu_head rhead; struct kref refcount; - struct amdgpu_bo *gds_obj; - struct amdgpu_bo *gws_obj; - struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, struct amdgpu_bo_list **result); void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, struct list_head *validated); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1081fd00b059..88b58facf29e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, return 0; } static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct list_head duplicates; - struct amdgpu_bo *gds; - struct amdgpu_bo *gws; - struct amdgpu_bo *oa; unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ if (cs->in.bo_list_handle) { if (p->bo_list) return -EINVAL; @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = amdgpu_cs_list_validate(p, &p->validated); if (r) { DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); goto error_validate; } amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); - gds = p->bo_list->gds_obj; - gws = p->bo_list->gws_obj; - oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) e->bo_va = amdgpu_vm_bo_find(vm, e->robj); - if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); - } - if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); - } - if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); - } - if (!r && p->uf_entry.robj) { struct amdgpu_bo *uf = p->uf_entry.robj; r = amdgpu_ttm_alloc_gart(&uf->tbo); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); } error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index e73728d90388..69ba25c2e921 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -17,48 +17,33 @@ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef __AMDGPU_GDS_H__ #define __AMDGPU_GDS_H__ -/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned, - * we should report GDS/GWS/OA size as PAGE_SIZE aligned - * */ -#define AMDGPU_GDS_SHIFT 2 -#define AMDGPU_GWS_SHIFT PAGE_SHIFT -#define AMDGPU_OA_SHIFT PAGE_SHIFT - struct amdgpu_ring; struct amdgpu_bo; struct amdgpu_gds_asic_info { uint32_t total_size; - uint32_t gfx_partition_size; - uint32_t cs_partition_size; + uint32_t gfx_size_per_vmid; + uint32_t kfd_size_per_vmid; }; struct amdgpu_gds { struct amdgpu_gds_asic_info mem; struct amdgpu_gds_asic_info gws; struct amdgpu_gds_asic_info oa; - /* At present, GDS, GWS and OA resources for gfx (graphics) - * is always pre-allocated and available for graphics operation. - * Such resource is shared between all gfx clients. - * TODO: move this operation to user space - * */ - struct amdgpu_bo* gds_gfx_bo; - struct amdgpu_bo* gws_gfx_bo; - struct amdgpu_bo* oa_gfx_bo; }; struct amdgpu_gds_reg_offset { uint32_t mem_base; uint32_t mem_size; uint32_t gws; uint32_t oa; }; #endif /* __AMDGPU_GDS_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d30a0838851b..c87ad4b4d0b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) return -EINVAL; /* reject invalid gem domains */ - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_VRAM)) return -EINVAL; - /* create a gem object to contain this object in */ - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { - /* if gds bo is created from user space, it must be - * passed to bo list - */ - DRM_ERROR("GDS bo cannot be per-vm-bo\n"); - return -EINVAL; - } - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) - size = size << AMDGPU_GDS_SHIFT; - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) - size = size << AMDGPU_GWS_SHIFT; - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) - size = size << AMDGPU_OA_SHIFT; - else - return -EINVAL; - } size = roundup(size, PAGE_SIZE); if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { r = amdgpu_bo_reserve(vm->root.base.bo, false); if (r) return r; resv = vm->root.base.bo->tbo.resv; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3a072a7a39f0..c2e6a1a11d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, * Reset saved GDW, GWS and OA to force switch on next flush. */ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, unsigned vmid) { struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[vmid]; mutex_lock(&id_mgr->lock); id->owner = 0; - id->gds_base = 0; - id->gds_size = 0; - id->gws_base = 0; - id->gws_size = 0; - id->oa_base = 0; - id->oa_size = 0; mutex_unlock(&id_mgr->lock); } /** * amdgpu_vmid_reset_all - reset VMID to zero * * @adev: amdgpu device structure * * Reset VMID to force flush on next use */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 7625419f0fc2..06078e665532 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -44,27 +44,20 @@ struct amdgpu_vmid { struct amdgpu_sync active; struct dma_fence *last_flush; uint64_t owner; uint64_t pd_gpu_addr; /* last flushed PD/PT update */ struct dma_fence *flushed_updates; uint32_t current_gpu_reset_count; - uint32_t gds_base; - uint32_t gds_size; - uint32_t gws_base; - uint32_t gws_size; - uint32_t oa_base; - uint32_t oa_size; - unsigned pasid; struct dma_fence *pasid_mapping; }; struct amdgpu_vmid_mgr { struct mutex lock; unsigned num_ids; struct list_head ids_lru; struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; atomic_t reserved_vmid_num; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 57cfe78a262b..3db553f6ad01 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -42,23 +42,20 @@ struct amdgpu_job { struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; uint32_t num_ibs; void *owner; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; unsigned pasid; - uint32_t gds_base, gds_size; - uint32_t gws_base, gws_size; - uint32_t oa_base, oa_size; uint32_t vram_lost_counter; /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 29ac3873eeb0..209954290954 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GDS_CONFIG: { struct drm_amdgpu_info_gds gds_info; memset(&gds_info, 0, sizeof(gds_info)); - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT; - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT; - gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT; - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT; - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT; - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT; - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT; + gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_size_per_vmid; + gds_info.compute_partition_size = adev->gds.mem.kfd_size_per_vmid; + gds_info.gds_total_size = adev->gds.mem.total_size; + gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_size_per_vmid; + gds_info.gws_per_compute_partition = adev->gds.gws.kfd_size_per_vmid; + gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_size_per_vmid; + gds_info.oa_per_compute_partition = adev->gds.oa.kfd_size_per_vmid; return copy_to_user(out, &gds_info, min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; } case AMDGPU_INFO_VRAM_GTT: { struct drm_amdgpu_info_vram_gtt vram_gtt; vram_gtt.vram_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - atomic64_read(&adev->visible_pin_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index de990bdcdd6c..76770a8c29a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_SYSTEM; if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED; else places[c].flags |= TTM_PL_FLAG_CACHED; c++; } - if (domain & AMDGPU_GEM_DOMAIN_GDS) { - places[c].fpfn = 0; - places[c].lpfn = 0; - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS; - c++; - } - - if (domain & AMDGPU_GEM_DOMAIN_GWS) { - places[c].fpfn = 0; - places[c].lpfn = 0; - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS; - c++; - } - - if (domain & AMDGPU_GEM_DOMAIN_OA) { - places[c].fpfn = 0; - places[c].lpfn = 0; - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA; - c++; - } - if (!c) { places[c].fpfn = 0; places[c].lpfn = 0; places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; c++; } BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); placement->num_placement = c; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 907fdf46d895..e089964cbcb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -120,26 +120,20 @@ static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) */ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) { switch (mem_type) { case TTM_PL_VRAM: return AMDGPU_GEM_DOMAIN_VRAM; case TTM_PL_TT: return AMDGPU_GEM_DOMAIN_GTT; case TTM_PL_SYSTEM: return AMDGPU_GEM_DOMAIN_CPU; - case AMDGPU_PL_GDS: - return AMDGPU_GEM_DOMAIN_GDS; - case AMDGPU_PL_GWS: - return AMDGPU_GEM_DOMAIN_GWS; - case AMDGPU_PL_OA: - return AMDGPU_GEM_DOMAIN_OA; default: break; } return 0; } /** * amdgpu_bo_reserve - reserve bo * @bo: bo structure * @no_intr: don't return -ERESTARTSYS on pending signal diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 9cc239968e40..f6ea9604e611 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs { /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vmid, bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size); /* testing functions */ int (*test_ring)(struct amdgpu_ring *ring); int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); void (*insert_start)(struct amdgpu_ring *ring); void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); unsigned (*init_cond_exec)(struct amdgpu_ring *ring); @@ -226,21 +222,20 @@ struct amdgpu_ring { #define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8a158ee922f7..2cc62b0e7ea8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_VRAM: /* "On-card" video ram */ man->func = &amdgpu_vram_mgr_func; man->gpu_offset = adev->gmc.vram_start; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; man->default_caching = TTM_PL_FLAG_WC; break; - case AMDGPU_PL_GDS: - case AMDGPU_PL_GWS: - case AMDGPU_PL_OA: - /* On-chip GDS memory*/ - man->func = &ttm_bo_manager_func; - man->gpu_offset = 0; - man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA; - man->available_caching = TTM_PL_FLAG_UNCACHED; - man->default_caching = TTM_PL_FLAG_UNCACHED; - break; default: DRM_ERROR("Unsupported memory type %u\n", (unsigned)type); return -EINVAL; } return 0; } /** * amdgpu_evict_flags - Compute placement flags * @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, if (r) { DRM_ERROR("failed to pin userptr\n"); return r; } } if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", ttm->num_pages, bo_mem, ttm); } - if (bo_mem->mem_type == AMDGPU_PL_GDS || - bo_mem->mem_type == AMDGPU_PL_GWS || - bo_mem->mem_type == AMDGPU_PL_OA) - return -EINVAL; - if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { gtt->offset = AMDGPU_BO_INVALID_OFFSET; return 0; } /* compute PTE flags relevant to this BO memory */ flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); /* bind pages into GART page tables */ gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) - adev->gmc.gart_start; @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize GTT memory pool */ r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing GTT heap.\n"); return r; } DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned)(gtt_size / (1024 * 1024))); - /* Initialize various on-chip memory pools */ - adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; - adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; - adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT; - adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT; - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT; - adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT; - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT; - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT; - /* GDS Memory */ - if (adev->gds.mem.total_size) { - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing GDS heap.\n"); - return r; - } - } - - /* GWS */ - if (adev->gds.gws.total_size) { - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing gws heap.\n"); - return r; - } - } - - /* OA */ - if (adev->gds.oa.total_size) { - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing oa heap.\n"); - return r; - } - } - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { DRM_ERROR("Failed to init debugfs\n"); return r; } return 0; } /** @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) return; amdgpu_ttm_debugfs_fini(adev); amdgpu_ttm_fw_reserve_vram_fini(adev); if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); - if (adev->gds.mem.total_size) - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); - if (adev->gds.gws.total_size) - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); - if (adev->gds.oa.total_size) - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); ttm_bo_device_release(&adev->mman.bdev); amdgpu_ttm_global_fini(adev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); } /** * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index fe8f276e9811..04557a382b19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -20,28 +20,20 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef __AMDGPU_TTM_H__ #define __AMDGPU_TTM_H__ #include "amdgpu.h" #include <drm/gpu_scheduler.h> -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2) - -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0) -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1) -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2) - #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 struct amdgpu_mman { struct ttm_bo_global_ref bo_global_ref; struct drm_global_reference mem_global_ref; struct ttm_bo_device bdev; bool mem_global_referenced; bool initialized; void __iomem *aper_base_kaddr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index be1659fedf94..c66f1c6f0ba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) * Returns: * True if sync is needed. */ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id; - bool gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; if (job->vmid == 0) return false; id = &id_mgr->ids[job->vmid]; - gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != job->gds_base || - id->gds_size != job->gds_size || - id->gws_base != job->gws_base || - id->gws_size != job->gws_size || - id->oa_base != job->oa_base || - id->oa_size != job->oa_size); if (amdgpu_vmid_had_gpu_reset(adev, id)) return true; - return vm_flush_needed || gds_switch_needed; + return vm_flush_needed; } /** * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush * @job: related job * @need_pipe_sync: is pipe sync needed * * Emit a VM flush when it is necessary. * * Returns: * 0 on success, errno otherwise. */ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; - bool gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != job->gds_base || - id->gds_size != job->gds_size || - id->gws_base != job->gws_base || - id->gws_size != job->gws_size || - id->oa_base != job->oa_base || - id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; bool pasid_mapping_needed = id->pasid != job->pasid || !id->pasid_mapping || !dma_fence_is_signaled(id->pasid_mapping); struct dma_fence *fence = NULL; unsigned patch_offset = 0; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { - gds_switch_needed = true; vm_flush_needed = true; pasid_mapping_needed = true; } - gds_switch_needed &= !!ring->funcs->emit_gds_switch; vm_flush_needed &= !!ring->funcs->emit_vm_flush && job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) + if (!vm_flush_needed && !need_pipe_sync) return 0; if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ mutex_unlock(&id_mgr->lock); } if (pasid_mapping_needed) { id->pasid = job->pasid; dma_fence_put(id->pasid_mapping); id->pasid_mapping = dma_fence_get(fence); } dma_fence_put(fence); - if (ring->funcs->emit_gds_switch && gds_switch_needed) { - id->gds_base = job->gds_base; - id->gds_size = job->gds_size; - id->gws_base = job->gws_base; - id->gws_size = job->gws_size; - id->oa_base = job->oa_base; - id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, - job->gds_size, job->gws_base, - job->gws_size, job->oa_base, - job->oa_size); - } - if (ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ if (ring->funcs->emit_switch_buffer) { amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a15d9c0f233b..f5228e169c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * * Configures the 3D engine and tiling configuration * registers so that the 3D engine is usable. */ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) { u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; u32 tmp; - int i; + int i, vmid; WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); gfx_v7_0_tiling_mode_table_init(adev); gfx_v7_0_setup_rb(adev); @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) tmp = RREG32(mmSPI_ARB_PRIORITY); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); WREG32(mmSPI_ARB_PRIORITY, tmp); mutex_unlock(&adev->grbm_idx_mutex); + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { + unsigned gds_size, gws_size, oa_size; + + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) { + gds_size = adev->gds.mem.gfx_size_per_vmid; + gws_size = adev->gds.gws.gfx_size_per_vmid; + oa_size = adev->gds.oa.gfx_size_per_vmid; + } else { + gds_size = adev->gds.mem.kfd_size_per_vmid; + gws_size = adev->gds.gws.kfd_size_per_vmid; + oa_size = adev->gds.oa.kfd_size_per_vmid; + } + + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * gds_size); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size); + WREG32(amdgpu_gds_reg_offset[vmid].gws, + (vmid * gws_size) | + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); + WREG32(amdgpu_gds_reg_offset[vmid].oa, + ((1 << oa_size) - 1) << (vmid * oa_size)); + } + udelay(50); } /* * GPU scratch registers helpers function. */ /** * gfx_v7_0_scratch_init - setup driver info for CP scratch regs * * @adev: amdgpu_device pointer @@ -4157,68 +4179,20 @@ static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, - uint32_t vmid, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) -{ - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - - /* GDS Base */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, gds_base); - - /* GDS Size */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, gds_size); - - /* GWS */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); - - /* OA */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); -} - static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) { struct amdgpu_device *adev = ring->adev; uint32_t value = 0; value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); WREG32(mmSQ_CMD, value); @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle) ring_id, i, k, j); if (r) return r; ring_id++; } } } - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; gfx_v7_0_gpu_early_init(adev); return r; } static int gfx_v7_0_sw_fini(void *handle) { int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_rlc_fini(adev); gfx_v7_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .type = AMDGPU_RING_TYPE_GFX, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = false, .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_gfx, .set_wptr = gfx_v7_0_ring_set_wptr_gfx, .emit_frame_size = - 20 + /* gfx_v7_0_ring_emit_gds_switch */ 7 + /* gfx_v7_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = false, .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_compute, .set_wptr = gfx_v7_0_ring_set_wptr_compute, .emit_frame_size = - 20 + /* gfx_v7_0_ring_emit_gds_switch */ 7 + /* gfx_v7_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) { @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs; } static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */ + adev->gds.oa.kfd_size_per_vmid = 0; } - static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; u32 ao_cu_num; if (adev->flags & AMD_IS_APU) ao_cu_num = 2; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 3882689b2d8f..b11a54bd0668 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle) kiq = &adev->gfx.kiq; r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); if (r) return r; /* create MQD for all compute queues as well as KIQ for SRIOV case */ r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); if (r) return r; - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; r = gfx_v8_0_gpu_early_init(adev); if (r) return r; return 0; } static int gfx_v8_0_sw_fini(void *handle) { int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_compute_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev) case CHIP_CARRIZO: case CHIP_STONEY: adev->gfx.config.double_offchip_lds_buf = 0; break; } } static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) { u32 tmp, sh_static_mem_cfg; - int i; + int i, vmid; WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); gfx_v8_0_tiling_mode_table_init(adev); gfx_v8_0_setup_rb(adev); gfx_v8_0_get_cu_info(adev); gfx_v8_0_config_init(adev); @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) tmp = RREG32(mmSPI_ARB_PRIORITY); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); WREG32(mmSPI_ARB_PRIORITY, tmp); mutex_unlock(&adev->grbm_idx_mutex); + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { + unsigned gds_size, gws_size, oa_size; + + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) { + gds_size = adev->gds.mem.gfx_size_per_vmid; + gws_size = adev->gds.gws.gfx_size_per_vmid; + oa_size = adev->gds.oa.gfx_size_per_vmid; + } else { + gds_size = adev->gds.mem.kfd_size_per_vmid; + gws_size = adev->gds.gws.kfd_size_per_vmid; + oa_size = adev->gds.oa.kfd_size_per_vmid; + } + + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * gds_size); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size); + WREG32(amdgpu_gds_reg_offset[vmid].gws, + (vmid * gws_size) | + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); + WREG32(amdgpu_gds_reg_offset[vmid].oa, + ((1 << oa_size) - 1) << (vmid * oa_size)); + } } static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) { u32 i, j, k; u32 mask; mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -5383,68 +5381,20 @@ static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, - uint32_t vmid, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) -{ - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - - /* GDS Base */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, gds_base); - - /* GDS Size */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, gds_size); - - /* GWS */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); - - /* OA */ - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(0))); - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); -} - static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) { WREG32(mmSQ_IND_INDEX, (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | (address << SQ_IND_INDEX__INDEX__SHIFT) | (SQ_IND_INDEX__FORCE_READ_MASK)); return RREG32(mmSQ_IND_DATA); } @@ -7132,21 +7082,20 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v8_ring_emit_sb, .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, @@ -7155,51 +7104,48 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, .emit_frame_size = - 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 5 + /* hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .set_priority = gfx_v8_0_ring_set_priority_compute, .emit_wreg = gfx_v8_0_ring_emit_wreg, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { .type = AMDGPU_RING_TYPE_KIQ, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = false, .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_compute, .set_wptr = gfx_v8_0_ring_set_wptr_compute, .emit_frame_size = - 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 5 + /* hdp_invalidate */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_kiq, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, @@ -7278,39 +7224,26 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) adev->gfx.rlc.funcs = &iceland_rlc_funcs; } static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */ + adev->gds.oa.kfd_size_per_vmid = 0; } static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, u32 bitmap) { u32 data; if (!bitmap) return; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3594704a6f9b..48a7e25514f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1351,31 +1351,32 @@ static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) NULL); memset(&adev->gfx.ngg.buf[0], 0, sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); adev->gfx.ngg.init = false; return 0; } +/* TODO: remove */ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) { int r; if (!amdgpu_ngg || adev->gfx.ngg.init == true) return 0; /* GDS reserve memory: 64 bytes alignment */ adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; + adev->gds.mem.gfx_size_per_vmid -= adev->gfx.ngg.gds_reserve_size; adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); /* Primitive Buffer */ r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], amdgpu_prim_buf_per_se, 64 * 1024); if (r) { dev_err(adev->dev, "Failed to create Primitive Buffer\n"); goto err; @@ -1412,20 +1413,21 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) } out: adev->gfx.ngg.init = true; return 0; err: gfx_v9_0_ngg_fini(adev); return r; } +/* TODO: remove */ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; int r; u32 data, base; if (!amdgpu_ngg) return 0; /* Program buffer size */ @@ -1469,23 +1471,22 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) /* Clear GDS reserved memory */ r = amdgpu_ring_alloc(ring, 17); if (r) { DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", ring->idx, r); return r; } gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), - (adev->gds.mem.total_size + - adev->gfx.ngg.gds_reserve_size) >> - AMDGPU_GDS_SHIFT); + adev->gds.mem.total_size + + adev->gfx.ngg.gds_reserve_size); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | PACKET3_DMA_DATA_DST_SEL(1) | PACKET3_DMA_DATA_SRC_SEL(2))); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | @@ -1644,62 +1645,39 @@ static int gfx_v9_0_sw_init(void *handle) kiq = &adev->gfx.kiq; r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); if (r) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); if (r) return r; - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; r = gfx_v9_0_gpu_early_init(adev); if (r) return r; r = gfx_v9_0_ngg_init(adev); if (r) return r; return 0; } static int gfx_v9_0_sw_fini(void *handle) { int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_compute_mqd_sw_fini(adev); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); @@ -1813,21 +1791,21 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) { u32 tmp; - int i; + int i, vmid; WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_0_tiling_mode_table_init(adev); gfx_v9_0_setup_rb(adev); gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); /* XXX SH_MEM regs */ @@ -1869,20 +1847,43 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) (adev->gfx.config.sc_prim_fifo_size_frontend << PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_prim_fifo_size_backend << PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_hiz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); mutex_unlock(&adev->grbm_idx_mutex); + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { + unsigned gds_size, gws_size, oa_size; + + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) { + gds_size = adev->gds.mem.gfx_size_per_vmid; + gws_size = adev->gds.gws.gfx_size_per_vmid; + oa_size = adev->gds.oa.gfx_size_per_vmid; + } else { + gds_size = adev->gds.mem.kfd_size_per_vmid; + gws_size = adev->gds.gws.kfd_size_per_vmid; + oa_size = adev->gds.oa.kfd_size_per_vmid; + } + + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, + vmid * gds_size); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, + gds_size); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, + (vmid * gws_size) | + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, + ((1 << oa_size) - 1) << (vmid * oa_size)); + } } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) { u32 i, j, k; u32 mask; mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -3407,58 +3408,20 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } -static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, - uint32_t vmid, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) -{ - struct amdgpu_device *adev = ring->adev; - - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - - /* GDS Base */ - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, - gds_base); - - /* GDS Size */ - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, - gds_size); - - /* GWS */ - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, - gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); - - /* OA */ - gfx_v9_0_write_data_to_reg(ring, 0, false, - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, - (1 << (oa_size + oa_base)) - (1 << oa_base)); -} - static int gfx_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); gfx_v9_0_set_rlc_funcs(adev); @@ -4695,21 +4658,20 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ 8 + 8 + /* FENCE x2 */ 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence, .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, - .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_tmz = gfx_v9_0_ring_emit_tmz, @@ -4722,34 +4684,32 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .type = AMDGPU_RING_TYPE_COMPUTE, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, .emit_frame_size = - 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence, .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, - .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, .test_ring = gfx_v9_0_ring_test_ring, .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .set_priority = gfx_v9_0_ring_set_priority_compute, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, }; @@ -4757,21 +4717,20 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .type = AMDGPU_RING_TYPE_KIQ, .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .vmhub = AMDGPU_GFXHUB, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, .emit_frame_size = - 20 + /* gfx_v9_0_ring_emit_gds_switch */ 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence_kiq, @@ -4847,39 +4806,26 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) } } static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; - if (adev->gds.mem.total_size == 64 * 1024) { - adev->gds.mem.gfx_partition_size = 4096; - adev->gds.mem.cs_partition_size = 4096; - - adev->gds.gws.gfx_partition_size = 4; - adev->gds.gws.cs_partition_size = 4; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 1; - } else { - adev->gds.mem.gfx_partition_size = 1024; - adev->gds.mem.cs_partition_size = 1024; - - adev->gds.gws.gfx_partition_size = 16; - adev->gds.gws.cs_partition_size = 16; - - adev->gds.oa.gfx_partition_size = 4; - adev->gds.oa.cs_partition_size = 4; - } + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */ + adev->gds.oa.kfd_size_per_vmid = 0; } static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, u32 bitmap) { u32 data; if (!bitmap) return; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 94444eeba55b..9b9512b14cae 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -81,36 +81,27 @@ extern "C" { * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. * Memory in this pool could be swapped out to disk if there is pressure. * * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the * GPU's virtual address space via gart. Gart memory linearizes non-contiguous * pages of system memory, allows GPU access system memory in a linezrized * fashion. * * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory * carved out by the BIOS. - * - * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data - * across shader threads. - * - * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the - * execution of all the waves on a device. - * - * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines - * for appending data. */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 #define AMDGPU_GEM_DOMAIN_VRAM 0x4 -#define AMDGPU_GEM_DOMAIN_GDS 0x8 -#define AMDGPU_GEM_DOMAIN_GWS 0x10 -#define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_GDS 0x8 /* non-functional */ +#define AMDGPU_GEM_DOMAIN_GWS 0x10 /* non-functional */ +#define AMDGPU_GEM_DOMAIN_OA 0x20 /* non-functional */ #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ AMDGPU_GEM_DOMAIN_OA) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) /* Flag that CPU access will not work, this VRAM domain is invisible */ -- 2.17.1