As discussed internally that doesn't work because threads don't necessary get the same VMID assigned. Christian. Am 12.09.2018 um 22:33 schrieb Marek Olšák: > From: Marek Olšák <marek.olsak at amd.com> > > I've chosen to do it like this because it's easy and allows an arbitrary > number of processes. > > Signed-off-by: Marek Olšák <marek.olsak at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 - > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ---- > drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +-- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +--- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 - > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +-- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ---- > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ---------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 -- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +----- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125 +++++--------------- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++-------------- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++------------- > include/uapi/drm/amdgpu_drm.h | 15 +-- > 19 files changed, 109 insertions(+), 519 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > index b80243d3972e..7264a4930b88 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c > @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, > / sizeof(struct amdgpu_bo_list_entry)) > return -EINVAL; > > size = sizeof(struct amdgpu_bo_list); > size += num_entries * sizeof(struct amdgpu_bo_list_entry); > list = kvmalloc(size, GFP_KERNEL); > if (!list) > return -ENOMEM; > > kref_init(&list->refcount); > - list->gds_obj = adev->gds.gds_gfx_bo; > - list->gws_obj = adev->gds.gws_gfx_bo; > - list->oa_obj = adev->gds.oa_gfx_bo; > > array = amdgpu_bo_list_array_entry(list, 0); > memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); > > for (i = 0; i < num_entries; ++i) { > struct amdgpu_bo_list_entry *entry; > struct drm_gem_object *gobj; > struct amdgpu_bo *bo; > struct mm_struct *usermm; > > @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, > } else { > entry = &array[last_entry++]; > } > > entry->robj = bo; > entry->priority = min(info[i].bo_priority, > AMDGPU_BO_LIST_MAX_PRIORITY); > entry->tv.bo = &entry->robj->tbo; > entry->tv.shared = !entry->robj->prime_shared_count; > > - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) > - list->gds_obj = entry->robj; > - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) > - list->gws_obj = entry->robj; > - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) > - list->oa_obj = entry->robj; > - > total_size += amdgpu_bo_size(entry->robj); > trace_amdgpu_bo_list_set(list, entry->robj); > } > > list->first_userptr = first_userptr; > list->num_entries = num_entries; > > trace_amdgpu_cs_bo_status(list->num_entries, total_size); > > *result = list; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > index 61b089768e1c..30f12a60aa28 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry { > struct ttm_validate_buffer tv; > struct amdgpu_bo_va *bo_va; > uint32_t priority; > struct page **user_pages; > int user_invalidated; > }; > > struct amdgpu_bo_list { > struct rcu_head rhead; > struct kref refcount; > - struct amdgpu_bo *gds_obj; > - struct amdgpu_bo *gws_obj; > - struct amdgpu_bo *oa_obj; > unsigned first_userptr; > unsigned num_entries; > }; > > int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, > struct amdgpu_bo_list **result); > void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, > struct list_head *validated); > void amdgpu_bo_list_put(struct amdgpu_bo_list *list); > int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 1081fd00b059..88b58facf29e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, > return 0; > } > > static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > union drm_amdgpu_cs *cs) > { > struct amdgpu_fpriv *fpriv = p->filp->driver_priv; > struct amdgpu_vm *vm = &fpriv->vm; > struct amdgpu_bo_list_entry *e; > struct list_head duplicates; > - struct amdgpu_bo *gds; > - struct amdgpu_bo *gws; > - struct amdgpu_bo *oa; > unsigned tries = 10; > int r; > > INIT_LIST_HEAD(&p->validated); > > /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ > if (cs->in.bo_list_handle) { > if (p->bo_list) > return -EINVAL; > > @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > > r = amdgpu_cs_list_validate(p, &p->validated); > if (r) { > DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); > goto error_validate; > } > > amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, > p->bytes_moved_vis); > > - gds = p->bo_list->gds_obj; > - gws = p->bo_list->gws_obj; > - oa = p->bo_list->oa_obj; > - > amdgpu_bo_list_for_each_entry(e, p->bo_list) > e->bo_va = amdgpu_vm_bo_find(vm, e->robj); > > - if (gds) { > - p->job->gds_base = amdgpu_bo_gpu_offset(gds); > - p->job->gds_size = amdgpu_bo_size(gds); > - } > - if (gws) { > - p->job->gws_base = amdgpu_bo_gpu_offset(gws); > - p->job->gws_size = amdgpu_bo_size(gws); > - } > - if (oa) { > - p->job->oa_base = amdgpu_bo_gpu_offset(oa); > - p->job->oa_size = amdgpu_bo_size(oa); > - } > - > if (!r && p->uf_entry.robj) { > struct amdgpu_bo *uf = p->uf_entry.robj; > > r = amdgpu_ttm_alloc_gart(&uf->tbo); > p->job->uf_addr += amdgpu_bo_gpu_offset(uf); > } > > error_validate: > if (r) > ttm_eu_backoff_reservation(&p->ticket, &p->validated); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h > index e73728d90388..69ba25c2e921 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h > @@ -17,48 +17,33 @@ > * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > * OTHER DEALINGS IN THE SOFTWARE. > * > */ > > #ifndef __AMDGPU_GDS_H__ > #define __AMDGPU_GDS_H__ > > -/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned, > - * we should report GDS/GWS/OA size as PAGE_SIZE aligned > - * */ > -#define AMDGPU_GDS_SHIFT 2 > -#define AMDGPU_GWS_SHIFT PAGE_SHIFT > -#define AMDGPU_OA_SHIFT PAGE_SHIFT > - > struct amdgpu_ring; > struct amdgpu_bo; > > struct amdgpu_gds_asic_info { > uint32_t total_size; > - uint32_t gfx_partition_size; > - uint32_t cs_partition_size; > + uint32_t gfx_size_per_vmid; > + uint32_t kfd_size_per_vmid; > }; > > struct amdgpu_gds { > struct amdgpu_gds_asic_info mem; > struct amdgpu_gds_asic_info gws; > struct amdgpu_gds_asic_info oa; > - /* At present, GDS, GWS and OA resources for gfx (graphics) > - * is always pre-allocated and available for graphics operation. > - * Such resource is shared between all gfx clients. > - * TODO: move this operation to user space > - * */ > - struct amdgpu_bo* gds_gfx_bo; > - struct amdgpu_bo* gws_gfx_bo; > - struct amdgpu_bo* oa_gfx_bo; > }; > > struct amdgpu_gds_reg_offset { > uint32_t mem_base; > uint32_t mem_size; > uint32_t gws; > uint32_t oa; > }; > > #endif /* __AMDGPU_GDS_H__ */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index d30a0838851b..c87ad4b4d0b6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, > if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | > AMDGPU_GEM_CREATE_NO_CPU_ACCESS | > AMDGPU_GEM_CREATE_CPU_GTT_USWC | > AMDGPU_GEM_CREATE_VRAM_CLEARED | > AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | > AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) > > return -EINVAL; > > /* reject invalid gem domains */ > - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) > + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | > + AMDGPU_GEM_DOMAIN_GTT | > + AMDGPU_GEM_DOMAIN_VRAM)) > return -EINVAL; > > - /* create a gem object to contain this object in */ > - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | > - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { > - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { > - /* if gds bo is created from user space, it must be > - * passed to bo list > - */ > - DRM_ERROR("GDS bo cannot be per-vm-bo\n"); > - return -EINVAL; > - } > - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; > - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) > - size = size << AMDGPU_GDS_SHIFT; > - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) > - size = size << AMDGPU_GWS_SHIFT; > - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) > - size = size << AMDGPU_OA_SHIFT; > - else > - return -EINVAL; > - } > size = roundup(size, PAGE_SIZE); > > if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { > r = amdgpu_bo_reserve(vm->root.base.bo, false); > if (r) > return r; > > resv = vm->root.base.bo->tbo.resv; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > index 3a072a7a39f0..c2e6a1a11d7f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, > * Reset saved GDW, GWS and OA to force switch on next flush. > */ > void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, > unsigned vmid) > { > struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > struct amdgpu_vmid *id = &id_mgr->ids[vmid]; > > mutex_lock(&id_mgr->lock); > id->owner = 0; > - id->gds_base = 0; > - id->gds_size = 0; > - id->gws_base = 0; > - id->gws_size = 0; > - id->oa_base = 0; > - id->oa_size = 0; > mutex_unlock(&id_mgr->lock); > } > > /** > * amdgpu_vmid_reset_all - reset VMID to zero > * > * @adev: amdgpu device structure > * > * Reset VMID to force flush on next use > */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > index 7625419f0fc2..06078e665532 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h > @@ -44,27 +44,20 @@ struct amdgpu_vmid { > struct amdgpu_sync active; > struct dma_fence *last_flush; > uint64_t owner; > > uint64_t pd_gpu_addr; > /* last flushed PD/PT update */ > struct dma_fence *flushed_updates; > > uint32_t current_gpu_reset_count; > > - uint32_t gds_base; > - uint32_t gds_size; > - uint32_t gws_base; > - uint32_t gws_size; > - uint32_t oa_base; > - uint32_t oa_size; > - > unsigned pasid; > struct dma_fence *pasid_mapping; > }; > > struct amdgpu_vmid_mgr { > struct mutex lock; > unsigned num_ids; > struct list_head ids_lru; > struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; > atomic_t reserved_vmid_num; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > index 57cfe78a262b..3db553f6ad01 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > @@ -42,23 +42,20 @@ struct amdgpu_job { > struct amdgpu_sync sched_sync; > struct amdgpu_ib *ibs; > struct dma_fence *fence; /* the hw fence */ > uint32_t preamble_status; > uint32_t num_ibs; > void *owner; > bool vm_needs_flush; > uint64_t vm_pd_addr; > unsigned vmid; > unsigned pasid; > - uint32_t gds_base, gds_size; > - uint32_t gws_base, gws_size; > - uint32_t oa_base, oa_size; > uint32_t vram_lost_counter; > > /* user fence handling */ > uint64_t uf_addr; > uint64_t uf_sequence; > > }; > > int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > struct amdgpu_job **job, struct amdgpu_vm *vm); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 29ac3873eeb0..209954290954 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file > case AMDGPU_INFO_VIS_VRAM_USAGE: > ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); > return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; > case AMDGPU_INFO_GTT_USAGE: > ui64 = amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); > return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; > case AMDGPU_INFO_GDS_CONFIG: { > struct drm_amdgpu_info_gds gds_info; > > memset(&gds_info, 0, sizeof(gds_info)); > - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT; > - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT; > - gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT; > - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT; > - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT; > - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT; > - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT; > + gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_size_per_vmid; > + gds_info.compute_partition_size = adev->gds.mem.kfd_size_per_vmid; > + gds_info.gds_total_size = adev->gds.mem.total_size; > + gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_size_per_vmid; > + gds_info.gws_per_compute_partition = adev->gds.gws.kfd_size_per_vmid; > + gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_size_per_vmid; > + gds_info.oa_per_compute_partition = adev->gds.oa.kfd_size_per_vmid; > return copy_to_user(out, &gds_info, > min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; > } > case AMDGPU_INFO_VRAM_GTT: { > struct drm_amdgpu_info_vram_gtt vram_gtt; > > vram_gtt.vram_size = adev->gmc.real_vram_size - > atomic64_read(&adev->vram_pin_size); > vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size - > atomic64_read(&adev->visible_pin_size); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index de990bdcdd6c..76770a8c29a5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) > places[c].lpfn = 0; > places[c].flags = TTM_PL_FLAG_SYSTEM; > if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) > places[c].flags |= TTM_PL_FLAG_WC | > TTM_PL_FLAG_UNCACHED; > else > places[c].flags |= TTM_PL_FLAG_CACHED; > c++; > } > > - if (domain & AMDGPU_GEM_DOMAIN_GDS) { > - places[c].fpfn = 0; > - places[c].lpfn = 0; > - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS; > - c++; > - } > - > - if (domain & AMDGPU_GEM_DOMAIN_GWS) { > - places[c].fpfn = 0; > - places[c].lpfn = 0; > - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS; > - c++; > - } > - > - if (domain & AMDGPU_GEM_DOMAIN_OA) { > - places[c].fpfn = 0; > - places[c].lpfn = 0; > - places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA; > - c++; > - } > - > if (!c) { > places[c].fpfn = 0; > places[c].lpfn = 0; > places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; > c++; > } > > BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); > > placement->num_placement = c; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index 907fdf46d895..e089964cbcb7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -120,26 +120,20 @@ static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) > */ > static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) > { > switch (mem_type) { > case TTM_PL_VRAM: > return AMDGPU_GEM_DOMAIN_VRAM; > case TTM_PL_TT: > return AMDGPU_GEM_DOMAIN_GTT; > case TTM_PL_SYSTEM: > return AMDGPU_GEM_DOMAIN_CPU; > - case AMDGPU_PL_GDS: > - return AMDGPU_GEM_DOMAIN_GDS; > - case AMDGPU_PL_GWS: > - return AMDGPU_GEM_DOMAIN_GWS; > - case AMDGPU_PL_OA: > - return AMDGPU_GEM_DOMAIN_OA; > default: > break; > } > return 0; > } > > /** > * amdgpu_bo_reserve - reserve bo > * @bo: bo structure > * @no_intr: don't return -ERESTARTSYS on pending signal > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 9cc239968e40..f6ea9604e611 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs { > /* command emit functions */ > void (*emit_ib)(struct amdgpu_ring *ring, > struct amdgpu_ib *ib, > unsigned vmid, bool ctx_switch); > void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, > uint64_t seq, unsigned flags); > void (*emit_pipeline_sync)(struct amdgpu_ring *ring); > void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, > uint64_t pd_addr); > void (*emit_hdp_flush)(struct amdgpu_ring *ring); > - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, > - uint32_t gds_base, uint32_t gds_size, > - uint32_t gws_base, uint32_t gws_size, > - uint32_t oa_base, uint32_t oa_size); > /* testing functions */ > int (*test_ring)(struct amdgpu_ring *ring); > int (*test_ib)(struct amdgpu_ring *ring, long timeout); > /* insert NOP packets */ > void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); > void (*insert_start)(struct amdgpu_ring *ring); > void (*insert_end)(struct amdgpu_ring *ring); > /* pad the indirect buffer to the necessary number of dw */ > void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); > unsigned (*init_cond_exec)(struct amdgpu_ring *ring); > @@ -226,21 +222,20 @@ struct amdgpu_ring { > #define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) > #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) > #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) > #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) > #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) > #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) > #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) > #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) > #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) > #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) > -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) > #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) > #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) > #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) > #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) > #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) > #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) > #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) > #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) > #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) > #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 8a158ee922f7..2cc62b0e7ea8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, > break; > case TTM_PL_VRAM: > /* "On-card" video ram */ > man->func = &amdgpu_vram_mgr_func; > man->gpu_offset = adev->gmc.vram_start; > man->flags = TTM_MEMTYPE_FLAG_FIXED | > TTM_MEMTYPE_FLAG_MAPPABLE; > man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; > man->default_caching = TTM_PL_FLAG_WC; > break; > - case AMDGPU_PL_GDS: > - case AMDGPU_PL_GWS: > - case AMDGPU_PL_OA: > - /* On-chip GDS memory*/ > - man->func = &ttm_bo_manager_func; > - man->gpu_offset = 0; > - man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA; > - man->available_caching = TTM_PL_FLAG_UNCACHED; > - man->default_caching = TTM_PL_FLAG_UNCACHED; > - break; > default: > DRM_ERROR("Unsupported memory type %u\n", (unsigned)type); > return -EINVAL; > } > return 0; > } > > /** > * amdgpu_evict_flags - Compute placement flags > * > @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, > if (r) { > DRM_ERROR("failed to pin userptr\n"); > return r; > } > } > if (!ttm->num_pages) { > WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", > ttm->num_pages, bo_mem, ttm); > } > > - if (bo_mem->mem_type == AMDGPU_PL_GDS || > - bo_mem->mem_type == AMDGPU_PL_GWS || > - bo_mem->mem_type == AMDGPU_PL_OA) > - return -EINVAL; > - > if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { > gtt->offset = AMDGPU_BO_INVALID_OFFSET; > return 0; > } > > /* compute PTE flags relevant to this BO memory */ > flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); > > /* bind pages into GART page tables */ > gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) - adev->gmc.gart_start; > @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) > > /* Initialize GTT memory pool */ > r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); > if (r) { > DRM_ERROR("Failed initializing GTT heap.\n"); > return r; > } > DRM_INFO("amdgpu: %uM of GTT memory ready.\n", > (unsigned)(gtt_size / (1024 * 1024))); > > - /* Initialize various on-chip memory pools */ > - adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; > - adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; > - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; > - adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT; > - adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT; > - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT; > - adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT; > - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT; > - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT; > - /* GDS Memory */ > - if (adev->gds.mem.total_size) { > - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, > - adev->gds.mem.total_size >> PAGE_SHIFT); > - if (r) { > - DRM_ERROR("Failed initializing GDS heap.\n"); > - return r; > - } > - } > - > - /* GWS */ > - if (adev->gds.gws.total_size) { > - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, > - adev->gds.gws.total_size >> PAGE_SHIFT); > - if (r) { > - DRM_ERROR("Failed initializing gws heap.\n"); > - return r; > - } > - } > - > - /* OA */ > - if (adev->gds.oa.total_size) { > - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, > - adev->gds.oa.total_size >> PAGE_SHIFT); > - if (r) { > - DRM_ERROR("Failed initializing oa heap.\n"); > - return r; > - } > - } > - > /* Register debugfs entries for amdgpu_ttm */ > r = amdgpu_ttm_debugfs_init(adev); > if (r) { > DRM_ERROR("Failed to init debugfs\n"); > return r; > } > return 0; > } > > /** > @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) > return; > > amdgpu_ttm_debugfs_fini(adev); > amdgpu_ttm_fw_reserve_vram_fini(adev); > if (adev->mman.aper_base_kaddr) > iounmap(adev->mman.aper_base_kaddr); > adev->mman.aper_base_kaddr = NULL; > > ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); > ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); > - if (adev->gds.mem.total_size) > - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); > - if (adev->gds.gws.total_size) > - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); > - if (adev->gds.oa.total_size) > - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); > ttm_bo_device_release(&adev->mman.bdev); > amdgpu_ttm_global_fini(adev); > adev->mman.initialized = false; > DRM_INFO("amdgpu: ttm finalized\n"); > } > > /** > * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions > * > * @adev: amdgpu_device pointer > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index fe8f276e9811..04557a382b19 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -20,28 +20,20 @@ > * OTHER DEALINGS IN THE SOFTWARE. > * > */ > > #ifndef __AMDGPU_TTM_H__ > #define __AMDGPU_TTM_H__ > > #include "amdgpu.h" > #include <drm/gpu_scheduler.h> > > -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) > -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) > -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2) > - > -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0) > -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1) > -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2) > - > #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 > #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 > > struct amdgpu_mman { > struct ttm_bo_global_ref bo_global_ref; > struct drm_global_reference mem_global_ref; > struct ttm_bo_device bdev; > bool mem_global_referenced; > bool initialized; > void __iomem *aper_base_kaddr; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index be1659fedf94..c66f1c6f0ba8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) > * Returns: > * True if sync is needed. > */ > bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, > struct amdgpu_job *job) > { > struct amdgpu_device *adev = ring->adev; > unsigned vmhub = ring->funcs->vmhub; > struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > struct amdgpu_vmid *id; > - bool gds_switch_needed; > bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; > > if (job->vmid == 0) > return false; > id = &id_mgr->ids[job->vmid]; > - gds_switch_needed = ring->funcs->emit_gds_switch && ( > - id->gds_base != job->gds_base || > - id->gds_size != job->gds_size || > - id->gws_base != job->gws_base || > - id->gws_size != job->gws_size || > - id->oa_base != job->oa_base || > - id->oa_size != job->oa_size); > > if (amdgpu_vmid_had_gpu_reset(adev, id)) > return true; > > - return vm_flush_needed || gds_switch_needed; > + return vm_flush_needed; > } > > /** > * amdgpu_vm_flush - hardware flush the vm > * > * @ring: ring to use for flush > * @job: related job > * @need_pipe_sync: is pipe sync needed > * > * Emit a VM flush when it is necessary. > * > * Returns: > * 0 on success, errno otherwise. > */ > int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) > { > struct amdgpu_device *adev = ring->adev; > unsigned vmhub = ring->funcs->vmhub; > struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; > struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; > - bool gds_switch_needed = ring->funcs->emit_gds_switch && ( > - id->gds_base != job->gds_base || > - id->gds_size != job->gds_size || > - id->gws_base != job->gws_base || > - id->gws_size != job->gws_size || > - id->oa_base != job->oa_base || > - id->oa_size != job->oa_size); > bool vm_flush_needed = job->vm_needs_flush; > bool pasid_mapping_needed = id->pasid != job->pasid || > !id->pasid_mapping || > !dma_fence_is_signaled(id->pasid_mapping); > struct dma_fence *fence = NULL; > unsigned patch_offset = 0; > int r; > > if (amdgpu_vmid_had_gpu_reset(adev, id)) { > - gds_switch_needed = true; > vm_flush_needed = true; > pasid_mapping_needed = true; > } > > - gds_switch_needed &= !!ring->funcs->emit_gds_switch; > vm_flush_needed &= !!ring->funcs->emit_vm_flush && > job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; > pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && > ring->funcs->emit_wreg; > > - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) > + if (!vm_flush_needed && !need_pipe_sync) > return 0; > > if (ring->funcs->init_cond_exec) > patch_offset = amdgpu_ring_init_cond_exec(ring); > > if (need_pipe_sync) > amdgpu_ring_emit_pipeline_sync(ring); > > if (vm_flush_needed) { > trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); > @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ > mutex_unlock(&id_mgr->lock); > } > > if (pasid_mapping_needed) { > id->pasid = job->pasid; > dma_fence_put(id->pasid_mapping); > id->pasid_mapping = dma_fence_get(fence); > } > dma_fence_put(fence); > > - if (ring->funcs->emit_gds_switch && gds_switch_needed) { > - id->gds_base = job->gds_base; > - id->gds_size = job->gds_size; > - id->gws_base = job->gws_base; > - id->gws_size = job->gws_size; > - id->oa_base = job->oa_base; > - id->oa_size = job->oa_size; > - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, > - job->gds_size, job->gws_base, > - job->gws_size, job->oa_base, > - job->oa_size); > - } > - > if (ring->funcs->patch_cond_exec) > amdgpu_ring_patch_cond_exec(ring, patch_offset); > > /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ > if (ring->funcs->emit_switch_buffer) { > amdgpu_ring_emit_switch_buffer(ring); > amdgpu_ring_emit_switch_buffer(ring); > } > return 0; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index a15d9c0f233b..f5228e169c3a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev) > * > * @adev: amdgpu_device pointer > * > * Configures the 3D engine and tiling configuration > * registers so that the 3D engine is usable. > */ > static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) > { > u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; > u32 tmp; > - int i; > + int i, vmid; > > WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); > > WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); > WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); > WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); > > gfx_v7_0_tiling_mode_table_init(adev); > > gfx_v7_0_setup_rb(adev); > @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) > > tmp = RREG32(mmSPI_ARB_PRIORITY); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); > WREG32(mmSPI_ARB_PRIORITY, tmp); > > mutex_unlock(&adev->grbm_idx_mutex); > > + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { > + unsigned gds_size, gws_size, oa_size; > + > + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) { > + gds_size = adev->gds.mem.gfx_size_per_vmid; > + gws_size = adev->gds.gws.gfx_size_per_vmid; > + oa_size = adev->gds.oa.gfx_size_per_vmid; > + } else { > + gds_size = adev->gds.mem.kfd_size_per_vmid; > + gws_size = adev->gds.gws.kfd_size_per_vmid; > + oa_size = adev->gds.oa.kfd_size_per_vmid; > + } > + > + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * gds_size); > + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size); > + WREG32(amdgpu_gds_reg_offset[vmid].gws, > + (vmid * gws_size) | > + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); > + WREG32(amdgpu_gds_reg_offset[vmid].oa, > + ((1 << oa_size) - 1) << (vmid * oa_size)); > + } > + > udelay(50); > } > > /* > * GPU scratch registers helpers function. > */ > /** > * gfx_v7_0_scratch_init - setup driver info for CP scratch regs > * > * @adev: amdgpu_device pointer > @@ -4157,68 +4179,20 @@ static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev) > uint64_t clock; > > mutex_lock(&adev->gfx.gpu_clock_mutex); > WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); > clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | > ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); > mutex_unlock(&adev->gfx.gpu_clock_mutex); > return clock; > } > > -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, > - uint32_t vmid, > - uint32_t gds_base, uint32_t gds_size, > - uint32_t gws_base, uint32_t gws_size, > - uint32_t oa_base, uint32_t oa_size) > -{ > - gds_base = gds_base >> AMDGPU_GDS_SHIFT; > - gds_size = gds_size >> AMDGPU_GDS_SHIFT; > - > - gws_base = gws_base >> AMDGPU_GWS_SHIFT; > - gws_size = gws_size >> AMDGPU_GWS_SHIFT; > - > - oa_base = oa_base >> AMDGPU_OA_SHIFT; > - oa_size = oa_size >> AMDGPU_OA_SHIFT; > - > - /* GDS Base */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, gds_base); > - > - /* GDS Size */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, gds_size); > - > - /* GWS */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); > - > - /* OA */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); > -} > - > static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) > { > struct amdgpu_device *adev = ring->adev; > uint32_t value = 0; > > value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); > value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); > value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); > value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); > WREG32(mmSQ_CMD, value); > @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle) > ring_id, > i, k, j); > if (r) > return r; > > ring_id++; > } > } > } > > - /* reserve GDS, GWS and OA resource for gfx */ > - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, > - &adev->gds.gds_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, > - &adev->gds.gws_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, > - &adev->gds.oa_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > adev->gfx.ce_ram_size = 0x8000; > > gfx_v7_0_gpu_early_init(adev); > > return r; > } > > static int gfx_v7_0_sw_fini(void *handle) > { > int i; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); > - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); > - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); > - > for (i = 0; i < adev->gfx.num_gfx_rings; i++) > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); > for (i = 0; i < adev->gfx.num_compute_rings; i++) > amdgpu_ring_fini(&adev->gfx.compute_ring[i]); > > gfx_v7_0_cp_compute_fini(adev); > gfx_v7_0_rlc_fini(adev); > gfx_v7_0_mec_fini(adev); > amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, > &adev->gfx.rlc.clear_state_gpu_addr, > @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { > > static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { > .type = AMDGPU_RING_TYPE_GFX, > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = false, > .get_rptr = gfx_v7_0_ring_get_rptr, > .get_wptr = gfx_v7_0_ring_get_wptr_gfx, > .set_wptr = gfx_v7_0_ring_set_wptr_gfx, > .emit_frame_size = > - 20 + /* gfx_v7_0_ring_emit_gds_switch */ > 7 + /* gfx_v7_0_ring_emit_hdp_flush */ > 5 + /* hdp invalidate */ > 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ > 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ > CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ > 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ > .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ > .emit_ib = gfx_v7_0_ring_emit_ib_gfx, > .emit_fence = gfx_v7_0_ring_emit_fence_gfx, > .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, > .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, > - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, > .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, > .test_ring = gfx_v7_0_ring_test_ring, > .test_ib = gfx_v7_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, > .emit_wreg = gfx_v7_0_ring_emit_wreg, > .soft_recovery = gfx_v7_0_ring_soft_recovery, > }; > > static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { > .type = AMDGPU_RING_TYPE_COMPUTE, > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = false, > .get_rptr = gfx_v7_0_ring_get_rptr, > .get_wptr = gfx_v7_0_ring_get_wptr_compute, > .set_wptr = gfx_v7_0_ring_set_wptr_compute, > .emit_frame_size = > - 20 + /* gfx_v7_0_ring_emit_gds_switch */ > 7 + /* gfx_v7_0_ring_emit_hdp_flush */ > 5 + /* hdp invalidate */ > 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ > CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ > 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ > .emit_ib = gfx_v7_0_ring_emit_ib_compute, > .emit_fence = gfx_v7_0_ring_emit_fence_compute, > .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, > .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, > - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, > .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, > .test_ring = gfx_v7_0_ring_test_ring, > .test_ib = gfx_v7_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_wreg = gfx_v7_0_ring_emit_wreg, > }; > > static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) > { > @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev) > adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs; > } > > static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) > { > /* init asci gds info */ > adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); > adev->gds.gws.total_size = 64; > adev->gds.oa.total_size = 16; > > - if (adev->gds.mem.total_size == 64 * 1024) { > - adev->gds.mem.gfx_partition_size = 4096; > - adev->gds.mem.cs_partition_size = 4096; > - > - adev->gds.gws.gfx_partition_size = 4; > - adev->gds.gws.cs_partition_size = 4; > - > - adev->gds.oa.gfx_partition_size = 4; > - adev->gds.oa.cs_partition_size = 1; > - } else { > - adev->gds.mem.gfx_partition_size = 1024; > - adev->gds.mem.cs_partition_size = 1024; > - > - adev->gds.gws.gfx_partition_size = 16; > - adev->gds.gws.cs_partition_size = 16; > - > - adev->gds.oa.gfx_partition_size = 4; > - adev->gds.oa.cs_partition_size = 4; > - } > + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; > + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; > + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; > + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; > + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */ > + adev->gds.oa.kfd_size_per_vmid = 0; > } > > - > static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) > { > int i, j, k, counter, active_cu_number = 0; > u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; > struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; > unsigned disable_masks[4 * 2]; > u32 ao_cu_num; > > if (adev->flags & AMD_IS_APU) > ao_cu_num = 2; > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 3882689b2d8f..b11a54bd0668 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle) > kiq = &adev->gfx.kiq; > r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); > if (r) > return r; > > /* create MQD for all compute queues as well as KIQ for SRIOV case */ > r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); > if (r) > return r; > > - /* reserve GDS, GWS and OA resource for gfx */ > - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, > - &adev->gds.gds_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, > - &adev->gds.gws_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, > - &adev->gds.oa_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > adev->gfx.ce_ram_size = 0x8000; > > r = gfx_v8_0_gpu_early_init(adev); > if (r) > return r; > > return 0; > } > > static int gfx_v8_0_sw_fini(void *handle) > { > int i; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); > - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); > - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); > - > for (i = 0; i < adev->gfx.num_gfx_rings; i++) > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); > for (i = 0; i < adev->gfx.num_compute_rings; i++) > amdgpu_ring_fini(&adev->gfx.compute_ring[i]); > > amdgpu_gfx_compute_mqd_sw_fini(adev); > amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); > amdgpu_gfx_kiq_fini(adev); > > gfx_v8_0_mec_fini(adev); > @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev) > case CHIP_CARRIZO: > case CHIP_STONEY: > adev->gfx.config.double_offchip_lds_buf = 0; > break; > } > } > > static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) > { > u32 tmp, sh_static_mem_cfg; > - int i; > + int i, vmid; > > WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); > WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); > WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); > WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); > > gfx_v8_0_tiling_mode_table_init(adev); > gfx_v8_0_setup_rb(adev); > gfx_v8_0_get_cu_info(adev); > gfx_v8_0_config_init(adev); > @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) > > tmp = RREG32(mmSPI_ARB_PRIORITY); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); > tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); > WREG32(mmSPI_ARB_PRIORITY, tmp); > > mutex_unlock(&adev->grbm_idx_mutex); > > + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { > + unsigned gds_size, gws_size, oa_size; > + > + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) { > + gds_size = adev->gds.mem.gfx_size_per_vmid; > + gws_size = adev->gds.gws.gfx_size_per_vmid; > + oa_size = adev->gds.oa.gfx_size_per_vmid; > + } else { > + gds_size = adev->gds.mem.kfd_size_per_vmid; > + gws_size = adev->gds.gws.kfd_size_per_vmid; > + oa_size = adev->gds.oa.kfd_size_per_vmid; > + } > + > + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * gds_size); > + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size); > + WREG32(amdgpu_gds_reg_offset[vmid].gws, > + (vmid * gws_size) | > + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); > + WREG32(amdgpu_gds_reg_offset[vmid].oa, > + ((1 << oa_size) - 1) << (vmid * oa_size)); > + } > } > > static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) > { > u32 i, j, k; > u32 mask; > > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > @@ -5383,68 +5381,20 @@ static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) > uint64_t clock; > > mutex_lock(&adev->gfx.gpu_clock_mutex); > WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); > clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | > ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); > mutex_unlock(&adev->gfx.gpu_clock_mutex); > return clock; > } > > -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, > - uint32_t vmid, > - uint32_t gds_base, uint32_t gds_size, > - uint32_t gws_base, uint32_t gws_size, > - uint32_t oa_base, uint32_t oa_size) > -{ > - gds_base = gds_base >> AMDGPU_GDS_SHIFT; > - gds_size = gds_size >> AMDGPU_GDS_SHIFT; > - > - gws_base = gws_base >> AMDGPU_GWS_SHIFT; > - gws_size = gws_size >> AMDGPU_GWS_SHIFT; > - > - oa_base = oa_base >> AMDGPU_OA_SHIFT; > - oa_size = oa_size >> AMDGPU_OA_SHIFT; > - > - /* GDS Base */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, gds_base); > - > - /* GDS Size */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, gds_size); > - > - /* GWS */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); > - > - /* OA */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | > - WRITE_DATA_DST_SEL(0))); > - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); > -} > - > static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) > { > WREG32(mmSQ_IND_INDEX, > (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | > (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | > (address << SQ_IND_INDEX__INDEX__SHIFT) | > (SQ_IND_INDEX__FORCE_READ_MASK)); > return RREG32(mmSQ_IND_DATA); > } > > @@ -7132,21 +7082,20 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { > 31 + /* DE_META */ > 3 + /* CNTX_CTRL */ > 5 + /* HDP_INVL */ > 8 + 8 + /* FENCE x2 */ > 2, /* SWITCH_BUFFER */ > .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ > .emit_ib = gfx_v8_0_ring_emit_ib_gfx, > .emit_fence = gfx_v8_0_ring_emit_fence_gfx, > .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, > .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, > - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, > .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, > .test_ring = gfx_v8_0_ring_test_ring, > .test_ib = gfx_v8_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_switch_buffer = gfx_v8_ring_emit_sb, > .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, > .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, > .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, > .emit_wreg = gfx_v8_0_ring_emit_wreg, > @@ -7155,51 +7104,48 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { > > static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { > .type = AMDGPU_RING_TYPE_COMPUTE, > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = false, > .get_rptr = gfx_v8_0_ring_get_rptr, > .get_wptr = gfx_v8_0_ring_get_wptr_compute, > .set_wptr = gfx_v8_0_ring_set_wptr_compute, > .emit_frame_size = > - 20 + /* gfx_v8_0_ring_emit_gds_switch */ > 7 + /* gfx_v8_0_ring_emit_hdp_flush */ > 5 + /* hdp_invalidate */ > 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ > VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ > 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ > .emit_ib = gfx_v8_0_ring_emit_ib_compute, > .emit_fence = gfx_v8_0_ring_emit_fence_compute, > .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, > .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, > - .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, > .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, > .test_ring = gfx_v8_0_ring_test_ring, > .test_ib = gfx_v8_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .set_priority = gfx_v8_0_ring_set_priority_compute, > .emit_wreg = gfx_v8_0_ring_emit_wreg, > }; > > static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { > .type = AMDGPU_RING_TYPE_KIQ, > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = false, > .get_rptr = gfx_v8_0_ring_get_rptr, > .get_wptr = gfx_v8_0_ring_get_wptr_compute, > .set_wptr = gfx_v8_0_ring_set_wptr_compute, > .emit_frame_size = > - 20 + /* gfx_v8_0_ring_emit_gds_switch */ > 7 + /* gfx_v8_0_ring_emit_hdp_flush */ > 5 + /* hdp_invalidate */ > 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ > 17 + /* gfx_v8_0_ring_emit_vm_flush */ > 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ > .emit_ib = gfx_v8_0_ring_emit_ib_compute, > .emit_fence = gfx_v8_0_ring_emit_fence_kiq, > .test_ring = gfx_v8_0_ring_test_ring, > .test_ib = gfx_v8_0_ring_test_ib, > @@ -7278,39 +7224,26 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) > adev->gfx.rlc.funcs = &iceland_rlc_funcs; > } > > static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) > { > /* init asci gds info */ > adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); > adev->gds.gws.total_size = 64; > adev->gds.oa.total_size = 16; > > - if (adev->gds.mem.total_size == 64 * 1024) { > - adev->gds.mem.gfx_partition_size = 4096; > - adev->gds.mem.cs_partition_size = 4096; > - > - adev->gds.gws.gfx_partition_size = 4; > - adev->gds.gws.cs_partition_size = 4; > - > - adev->gds.oa.gfx_partition_size = 4; > - adev->gds.oa.cs_partition_size = 1; > - } else { > - adev->gds.mem.gfx_partition_size = 1024; > - adev->gds.mem.cs_partition_size = 1024; > - > - adev->gds.gws.gfx_partition_size = 16; > - adev->gds.gws.cs_partition_size = 16; > - > - adev->gds.oa.gfx_partition_size = 4; > - adev->gds.oa.cs_partition_size = 4; > - } > + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; > + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; > + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; > + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; > + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */ > + adev->gds.oa.kfd_size_per_vmid = 0; > } > > static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, > u32 bitmap) > { > u32 data; > > if (!bitmap) > return; > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 3594704a6f9b..48a7e25514f5 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -1351,31 +1351,32 @@ static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) > NULL); > > memset(&adev->gfx.ngg.buf[0], 0, > sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); > > adev->gfx.ngg.init = false; > > return 0; > } > > +/* TODO: remove */ > static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) > { > int r; > > if (!amdgpu_ngg || adev->gfx.ngg.init == true) > return 0; > > /* GDS reserve memory: 64 bytes alignment */ > adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); > adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size; > - adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size; > + adev->gds.mem.gfx_size_per_vmid -= adev->gfx.ngg.gds_reserve_size; > adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); > adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); > > /* Primitive Buffer */ > r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], > amdgpu_prim_buf_per_se, > 64 * 1024); > if (r) { > dev_err(adev->dev, "Failed to create Primitive Buffer\n"); > goto err; > @@ -1412,20 +1413,21 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) > } > > out: > adev->gfx.ngg.init = true; > return 0; > err: > gfx_v9_0_ngg_fini(adev); > return r; > } > > +/* TODO: remove */ > static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) > { > struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; > int r; > u32 data, base; > > if (!amdgpu_ngg) > return 0; > > /* Program buffer size */ > @@ -1469,23 +1471,22 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) > /* Clear GDS reserved memory */ > r = amdgpu_ring_alloc(ring, 17); > if (r) { > DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", > ring->idx, r); > return r; > } > > gfx_v9_0_write_data_to_reg(ring, 0, false, > SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), > - (adev->gds.mem.total_size + > - adev->gfx.ngg.gds_reserve_size) >> > - AMDGPU_GDS_SHIFT); > + adev->gds.mem.total_size + > + adev->gfx.ngg.gds_reserve_size); > > amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); > amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | > PACKET3_DMA_DATA_DST_SEL(1) | > PACKET3_DMA_DATA_SRC_SEL(2))); > amdgpu_ring_write(ring, 0); > amdgpu_ring_write(ring, 0); > amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); > amdgpu_ring_write(ring, 0); > amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | > @@ -1644,62 +1645,39 @@ static int gfx_v9_0_sw_init(void *handle) > kiq = &adev->gfx.kiq; > r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); > if (r) > return r; > > /* create MQD for all compute queues as wel as KIQ for SRIOV case */ > r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); > if (r) > return r; > > - /* reserve GDS, GWS and OA resource for gfx */ > - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, > - &adev->gds.gds_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, > - &adev->gds.gws_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, > - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, > - &adev->gds.oa_gfx_bo, NULL, NULL); > - if (r) > - return r; > - > adev->gfx.ce_ram_size = 0x8000; > > r = gfx_v9_0_gpu_early_init(adev); > if (r) > return r; > > r = gfx_v9_0_ngg_init(adev); > if (r) > return r; > > return 0; > } > > > static int gfx_v9_0_sw_fini(void *handle) > { > int i; > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); > - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); > - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); > - > for (i = 0; i < adev->gfx.num_gfx_rings; i++) > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); > for (i = 0; i < adev->gfx.num_compute_rings; i++) > amdgpu_ring_fini(&adev->gfx.compute_ring[i]); > > amdgpu_gfx_compute_mqd_sw_fini(adev); > amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); > amdgpu_gfx_kiq_fini(adev); > > gfx_v9_0_mec_fini(adev); > @@ -1813,21 +1791,21 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) > WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); > WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); > } > soc15_grbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > } > > static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) > { > u32 tmp; > - int i; > + int i, vmid; > > WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); > > gfx_v9_0_tiling_mode_table_init(adev); > > gfx_v9_0_setup_rb(adev); > gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); > adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); > > /* XXX SH_MEM regs */ > @@ -1869,20 +1847,43 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) > (adev->gfx.config.sc_prim_fifo_size_frontend << > PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | > (adev->gfx.config.sc_prim_fifo_size_backend << > PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | > (adev->gfx.config.sc_hiz_tile_fifo_size << > PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | > (adev->gfx.config.sc_earlyz_tile_fifo_size << > PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); > mutex_unlock(&adev->grbm_idx_mutex); > > + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { > + unsigned gds_size, gws_size, oa_size; > + > + if (vmid < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) { > + gds_size = adev->gds.mem.gfx_size_per_vmid; > + gws_size = adev->gds.gws.gfx_size_per_vmid; > + oa_size = adev->gds.oa.gfx_size_per_vmid; > + } else { > + gds_size = adev->gds.mem.kfd_size_per_vmid; > + gws_size = adev->gds.gws.kfd_size_per_vmid; > + oa_size = adev->gds.oa.kfd_size_per_vmid; > + } > + > + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, > + vmid * gds_size); > + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, > + gds_size); > + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, > + (vmid * gws_size) | > + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); > + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, > + ((1 << oa_size) - 1) << (vmid * oa_size)); > + } > } > > static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) > { > u32 i, j, k; > u32 mask; > > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > @@ -3407,58 +3408,20 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) > uint64_t clock; > > mutex_lock(&adev->gfx.gpu_clock_mutex); > WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); > clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | > ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); > mutex_unlock(&adev->gfx.gpu_clock_mutex); > return clock; > } > > -static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, > - uint32_t vmid, > - uint32_t gds_base, uint32_t gds_size, > - uint32_t gws_base, uint32_t gws_size, > - uint32_t oa_base, uint32_t oa_size) > -{ > - struct amdgpu_device *adev = ring->adev; > - > - gds_base = gds_base >> AMDGPU_GDS_SHIFT; > - gds_size = gds_size >> AMDGPU_GDS_SHIFT; > - > - gws_base = gws_base >> AMDGPU_GWS_SHIFT; > - gws_size = gws_size >> AMDGPU_GWS_SHIFT; > - > - oa_base = oa_base >> AMDGPU_OA_SHIFT; > - oa_size = oa_size >> AMDGPU_OA_SHIFT; > - > - /* GDS Base */ > - gfx_v9_0_write_data_to_reg(ring, 0, false, > - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, > - gds_base); > - > - /* GDS Size */ > - gfx_v9_0_write_data_to_reg(ring, 0, false, > - SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, > - gds_size); > - > - /* GWS */ > - gfx_v9_0_write_data_to_reg(ring, 0, false, > - SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, > - gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); > - > - /* OA */ > - gfx_v9_0_write_data_to_reg(ring, 0, false, > - SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, > - (1 << (oa_size + oa_base)) - (1 << oa_base)); > -} > - > static int gfx_v9_0_early_init(void *handle) > { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > > adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; > adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; > gfx_v9_0_set_ring_funcs(adev); > gfx_v9_0_set_irq_funcs(adev); > gfx_v9_0_set_gds_init(adev); > gfx_v9_0_set_rlc_funcs(adev); > @@ -4695,21 +4658,20 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > 31 + /* DE_META */ > 3 + /* CNTX_CTRL */ > 5 + /* HDP_INVL */ > 8 + 8 + /* FENCE x2 */ > 2, /* SWITCH_BUFFER */ > .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ > .emit_ib = gfx_v9_0_ring_emit_ib_gfx, > .emit_fence = gfx_v9_0_ring_emit_fence, > .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, > .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, > - .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, > .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, > .test_ring = gfx_v9_0_ring_test_ring, > .test_ib = gfx_v9_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_switch_buffer = gfx_v9_ring_emit_sb, > .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, > .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, > .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, > .emit_tmz = gfx_v9_0_ring_emit_tmz, > @@ -4722,34 +4684,32 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > .type = AMDGPU_RING_TYPE_COMPUTE, > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, > .vmhub = AMDGPU_GFXHUB, > .get_rptr = gfx_v9_0_ring_get_rptr_compute, > .get_wptr = gfx_v9_0_ring_get_wptr_compute, > .set_wptr = gfx_v9_0_ring_set_wptr_compute, > .emit_frame_size = > - 20 + /* gfx_v9_0_ring_emit_gds_switch */ > 7 + /* gfx_v9_0_ring_emit_hdp_flush */ > 5 + /* hdp invalidate */ > 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > 2 + /* gfx_v9_0_ring_emit_vm_flush */ > 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ > .emit_ib = gfx_v9_0_ring_emit_ib_compute, > .emit_fence = gfx_v9_0_ring_emit_fence, > .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, > .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, > - .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, > .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, > .test_ring = gfx_v9_0_ring_test_ring, > .test_ib = gfx_v9_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .set_priority = gfx_v9_0_ring_set_priority_compute, > .emit_wreg = gfx_v9_0_ring_emit_wreg, > .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, > .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, > }; > @@ -4757,21 +4717,20 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { > .type = AMDGPU_RING_TYPE_KIQ, > .align_mask = 0xff, > .nop = PACKET3(PACKET3_NOP, 0x3FFF), > .support_64bit_ptrs = true, > .vmhub = AMDGPU_GFXHUB, > .get_rptr = gfx_v9_0_ring_get_rptr_compute, > .get_wptr = gfx_v9_0_ring_get_wptr_compute, > .set_wptr = gfx_v9_0_ring_set_wptr_compute, > .emit_frame_size = > - 20 + /* gfx_v9_0_ring_emit_gds_switch */ > 7 + /* gfx_v9_0_ring_emit_hdp_flush */ > 5 + /* hdp invalidate */ > 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ > SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + > SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + > 2 + /* gfx_v9_0_ring_emit_vm_flush */ > 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ > .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ > .emit_ib = gfx_v9_0_ring_emit_ib_compute, > .emit_fence = gfx_v9_0_ring_emit_fence_kiq, > @@ -4847,39 +4806,26 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) > } > } > > static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) > { > /* init asci gds info */ > adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); > adev->gds.gws.total_size = 64; > adev->gds.oa.total_size = 16; > > - if (adev->gds.mem.total_size == 64 * 1024) { > - adev->gds.mem.gfx_partition_size = 4096; > - adev->gds.mem.cs_partition_size = 4096; > - > - adev->gds.gws.gfx_partition_size = 4; > - adev->gds.gws.cs_partition_size = 4; > - > - adev->gds.oa.gfx_partition_size = 4; > - adev->gds.oa.cs_partition_size = 1; > - } else { > - adev->gds.mem.gfx_partition_size = 1024; > - adev->gds.mem.cs_partition_size = 1024; > - > - adev->gds.gws.gfx_partition_size = 16; > - adev->gds.gws.cs_partition_size = 16; > - > - adev->gds.oa.gfx_partition_size = 4; > - adev->gds.oa.cs_partition_size = 4; > - } > + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; > + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / AMDGPU_NUM_VMID; > + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; > + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / AMDGPU_NUM_VMID; > + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* gfx only */ > + adev->gds.oa.kfd_size_per_vmid = 0; > } > > static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, > u32 bitmap) > { > u32 data; > > if (!bitmap) > return; > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 94444eeba55b..9b9512b14cae 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -81,36 +81,27 @@ extern "C" { > * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. > * Memory in this pool could be swapped out to disk if there is pressure. > * > * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the > * GPU's virtual address space via gart. Gart memory linearizes non-contiguous > * pages of system memory, allows GPU access system memory in a linezrized > * fashion. > * > * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory > * carved out by the BIOS. > - * > - * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data > - * across shader threads. > - * > - * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the > - * execution of all the waves on a device. > - * > - * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines > - * for appending data. > */ > #define AMDGPU_GEM_DOMAIN_CPU 0x1 > #define AMDGPU_GEM_DOMAIN_GTT 0x2 > #define AMDGPU_GEM_DOMAIN_VRAM 0x4 > -#define AMDGPU_GEM_DOMAIN_GDS 0x8 > -#define AMDGPU_GEM_DOMAIN_GWS 0x10 > -#define AMDGPU_GEM_DOMAIN_OA 0x20 > +#define AMDGPU_GEM_DOMAIN_GDS 0x8 /* non-functional */ > +#define AMDGPU_GEM_DOMAIN_GWS 0x10 /* non-functional */ > +#define AMDGPU_GEM_DOMAIN_OA 0x20 /* non-functional */ > #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ > AMDGPU_GEM_DOMAIN_GTT | \ > AMDGPU_GEM_DOMAIN_VRAM | \ > AMDGPU_GEM_DOMAIN_GDS | \ > AMDGPU_GEM_DOMAIN_GWS | \ > AMDGPU_GEM_DOMAIN_OA) > > /* Flag that CPU access will be required for the case of VRAM domain */ > #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) > /* Flag that CPU access will not work, this VRAM domain is invisible */