GDS is a temporary memory. Its purpose depends on the job, but most of the time, the idea is: - beginning of IB - initialize GDS variables - dispatch compute that works with GDS variables - when done, copy GDS variables to memory - repeat ... - end of IB GDS is like a pool of global shader GPRs. GDS is too small for persistent data. Marek On Thu, Sep 13, 2018 at 1:26 PM, Christian König <christian.koenig at amd.com> wrote: > Are you sure of that? I mean it is rather pointless to have a Global Data > Share when it can't be used to share anything? > > On the other hand I'm not opposed to get rid of all that stuff if we really > don't need it. > > Christian. > > Am 13.09.2018 um 17:27 schrieb Marek Olšák: >> >> That's OK. We don't need IBs to get the same VMID. >> >> Marek >> >> On Thu, Sep 13, 2018 at 4:40 AM, Christian König >> <ckoenig.leichtzumerken at gmail.com> wrote: >>> >>> As discussed internally that doesn't work because threads don't necessary >>> get the same VMID assigned. >>> >>> Christian. >>> >>> Am 12.09.2018 um 22:33 schrieb Marek Olšák: >>>> >>>> From: Marek Olšák <marek.olsak at amd.com> >>>> >>>> I've chosen to do it like this because it's easy and allows an arbitrary >>>> number of processes. >>>> >>>> Signed-off-by: Marek Olšák <marek.olsak at amd.com> >>>> --- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 10 -- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 - >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 20 ---- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 19 +-- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 24 +--- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 - >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 7 -- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 - >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 +-- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 21 ---- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 - >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 - >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 61 ---------- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 8 -- >>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 34 +----- >>>> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 125 >>>> +++++--------------- >>>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 123 +++++-------------- >>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 ++++++------------- >>>> include/uapi/drm/amdgpu_drm.h | 15 +-- >>>> 19 files changed, 109 insertions(+), 519 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>> index b80243d3972e..7264a4930b88 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c >>>> @@ -71,23 +71,20 @@ int amdgpu_bo_list_create(struct amdgpu_device >>>> *adev, >>>> struct drm_file *filp, >>>> / sizeof(struct amdgpu_bo_list_entry)) >>>> return -EINVAL; >>>> size = sizeof(struct amdgpu_bo_list); >>>> size += num_entries * sizeof(struct amdgpu_bo_list_entry); >>>> list = kvmalloc(size, GFP_KERNEL); >>>> if (!list) >>>> return -ENOMEM; >>>> kref_init(&list->refcount); >>>> - list->gds_obj = adev->gds.gds_gfx_bo; >>>> - list->gws_obj = adev->gds.gws_gfx_bo; >>>> - list->oa_obj = adev->gds.oa_gfx_bo; >>>> array = amdgpu_bo_list_array_entry(list, 0); >>>> memset(array, 0, num_entries * sizeof(struct >>>> amdgpu_bo_list_entry)); >>>> for (i = 0; i < num_entries; ++i) { >>>> struct amdgpu_bo_list_entry *entry; >>>> struct drm_gem_object *gobj; >>>> struct amdgpu_bo *bo; >>>> struct mm_struct *usermm; >>>> @@ -111,27 +108,20 @@ int amdgpu_bo_list_create(struct amdgpu_device >>>> *adev, struct drm_file *filp, >>>> } else { >>>> entry = &array[last_entry++]; >>>> } >>>> entry->robj = bo; >>>> entry->priority = min(info[i].bo_priority, >>>> AMDGPU_BO_LIST_MAX_PRIORITY); >>>> entry->tv.bo = &entry->robj->tbo; >>>> entry->tv.shared = !entry->robj->prime_shared_count; >>>> - if (entry->robj->preferred_domains == >>>> AMDGPU_GEM_DOMAIN_GDS) >>>> - list->gds_obj = entry->robj; >>>> - if (entry->robj->preferred_domains == >>>> AMDGPU_GEM_DOMAIN_GWS) >>>> - list->gws_obj = entry->robj; >>>> - if (entry->robj->preferred_domains == >>>> AMDGPU_GEM_DOMAIN_OA) >>>> - list->oa_obj = entry->robj; >>>> - >>>> total_size += amdgpu_bo_size(entry->robj); >>>> trace_amdgpu_bo_list_set(list, entry->robj); >>>> } >>>> list->first_userptr = first_userptr; >>>> list->num_entries = num_entries; >>>> trace_amdgpu_cs_bo_status(list->num_entries, total_size); >>>> *result = list; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>>> index 61b089768e1c..30f12a60aa28 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>>> @@ -36,23 +36,20 @@ struct amdgpu_bo_list_entry { >>>> struct ttm_validate_buffer tv; >>>> struct amdgpu_bo_va *bo_va; >>>> uint32_t priority; >>>> struct page **user_pages; >>>> int user_invalidated; >>>> }; >>>> struct amdgpu_bo_list { >>>> struct rcu_head rhead; >>>> struct kref refcount; >>>> - struct amdgpu_bo *gds_obj; >>>> - struct amdgpu_bo *gws_obj; >>>> - struct amdgpu_bo *oa_obj; >>>> unsigned first_userptr; >>>> unsigned num_entries; >>>> }; >>>> int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, >>>> struct amdgpu_bo_list **result); >>>> void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, >>>> struct list_head *validated); >>>> void amdgpu_bo_list_put(struct amdgpu_bo_list *list); >>>> int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in >>>> *in, >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> index 1081fd00b059..88b58facf29e 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>>> @@ -565,23 +565,20 @@ static int amdgpu_cs_list_validate(struct >>>> amdgpu_cs_parser *p, >>>> return 0; >>>> } >>>> static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, >>>> union drm_amdgpu_cs *cs) >>>> { >>>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv; >>>> struct amdgpu_vm *vm = &fpriv->vm; >>>> struct amdgpu_bo_list_entry *e; >>>> struct list_head duplicates; >>>> - struct amdgpu_bo *gds; >>>> - struct amdgpu_bo *gws; >>>> - struct amdgpu_bo *oa; >>>> unsigned tries = 10; >>>> int r; >>>> INIT_LIST_HEAD(&p->validated); >>>> /* p->bo_list could already be assigned if >>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */ >>>> if (cs->in.bo_list_handle) { >>>> if (p->bo_list) >>>> return -EINVAL; >>>> @@ -705,40 +702,23 @@ static int amdgpu_cs_parser_bos(struct >>>> amdgpu_cs_parser *p, >>>> r = amdgpu_cs_list_validate(p, &p->validated); >>>> if (r) { >>>> DRM_ERROR("amdgpu_cs_list_validate(validated) >>>> failed.\n"); >>>> goto error_validate; >>>> } >>>> amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, >>>> p->bytes_moved_vis); >>>> - gds = p->bo_list->gds_obj; >>>> - gws = p->bo_list->gws_obj; >>>> - oa = p->bo_list->oa_obj; >>>> - >>>> amdgpu_bo_list_for_each_entry(e, p->bo_list) >>>> e->bo_va = amdgpu_vm_bo_find(vm, e->robj); >>>> - if (gds) { >>>> - p->job->gds_base = amdgpu_bo_gpu_offset(gds); >>>> - p->job->gds_size = amdgpu_bo_size(gds); >>>> - } >>>> - if (gws) { >>>> - p->job->gws_base = amdgpu_bo_gpu_offset(gws); >>>> - p->job->gws_size = amdgpu_bo_size(gws); >>>> - } >>>> - if (oa) { >>>> - p->job->oa_base = amdgpu_bo_gpu_offset(oa); >>>> - p->job->oa_size = amdgpu_bo_size(oa); >>>> - } >>>> - >>>> if (!r && p->uf_entry.robj) { >>>> struct amdgpu_bo *uf = p->uf_entry.robj; >>>> r = amdgpu_ttm_alloc_gart(&uf->tbo); >>>> p->job->uf_addr += amdgpu_bo_gpu_offset(uf); >>>> } >>>> error_validate: >>>> if (r) >>>> ttm_eu_backoff_reservation(&p->ticket, &p->validated); >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >>>> index e73728d90388..69ba25c2e921 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h >>>> @@ -17,48 +17,33 @@ >>>> * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, >>>> DAMAGES >>>> OR >>>> * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR >>>> OTHERWISE, >>>> * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE >>>> OR >>>> * OTHER DEALINGS IN THE SOFTWARE. >>>> * >>>> */ >>>> #ifndef __AMDGPU_GDS_H__ >>>> #define __AMDGPU_GDS_H__ >>>> -/* Because TTM request that alloacted buffer should be PAGE_SIZE >>>> aligned, >>>> - * we should report GDS/GWS/OA size as PAGE_SIZE aligned >>>> - * */ >>>> -#define AMDGPU_GDS_SHIFT 2 >>>> -#define AMDGPU_GWS_SHIFT PAGE_SHIFT >>>> -#define AMDGPU_OA_SHIFT PAGE_SHIFT >>>> - >>>> struct amdgpu_ring; >>>> struct amdgpu_bo; >>>> struct amdgpu_gds_asic_info { >>>> uint32_t total_size; >>>> - uint32_t gfx_partition_size; >>>> - uint32_t cs_partition_size; >>>> + uint32_t gfx_size_per_vmid; >>>> + uint32_t kfd_size_per_vmid; >>>> }; >>>> struct amdgpu_gds { >>>> struct amdgpu_gds_asic_info mem; >>>> struct amdgpu_gds_asic_info gws; >>>> struct amdgpu_gds_asic_info oa; >>>> - /* At present, GDS, GWS and OA resources for gfx (graphics) >>>> - * is always pre-allocated and available for graphics operation. >>>> - * Such resource is shared between all gfx clients. >>>> - * TODO: move this operation to user space >>>> - * */ >>>> - struct amdgpu_bo* gds_gfx_bo; >>>> - struct amdgpu_bo* gws_gfx_bo; >>>> - struct amdgpu_bo* oa_gfx_bo; >>>> }; >>>> struct amdgpu_gds_reg_offset { >>>> uint32_t mem_base; >>>> uint32_t mem_size; >>>> uint32_t gws; >>>> uint32_t oa; >>>> }; >>>> #endif /* __AMDGPU_GDS_H__ */ >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> index d30a0838851b..c87ad4b4d0b6 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>>> @@ -223,43 +223,25 @@ int amdgpu_gem_create_ioctl(struct drm_device >>>> *dev, >>>> void *data, >>>> if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | >>>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS | >>>> AMDGPU_GEM_CREATE_CPU_GTT_USWC | >>>> AMDGPU_GEM_CREATE_VRAM_CLEARED | >>>> AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | >>>> AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) >>>> return -EINVAL; >>>> /* reject invalid gem domains */ >>>> - if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) >>>> + if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | >>>> + AMDGPU_GEM_DOMAIN_GTT | >>>> + AMDGPU_GEM_DOMAIN_VRAM)) >>>> return -EINVAL; >>>> - /* create a gem object to contain this object in */ >>>> - if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | >>>> - AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { >>>> - if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { >>>> - /* if gds bo is created from user space, it must >>>> be >>>> - * passed to bo list >>>> - */ >>>> - DRM_ERROR("GDS bo cannot be per-vm-bo\n"); >>>> - return -EINVAL; >>>> - } >>>> - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; >>>> - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) >>>> - size = size << AMDGPU_GDS_SHIFT; >>>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) >>>> - size = size << AMDGPU_GWS_SHIFT; >>>> - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) >>>> - size = size << AMDGPU_OA_SHIFT; >>>> - else >>>> - return -EINVAL; >>>> - } >>>> size = roundup(size, PAGE_SIZE); >>>> if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { >>>> r = amdgpu_bo_reserve(vm->root.base.bo, false); >>>> if (r) >>>> return r; >>>> resv = vm->root.base.bo->tbo.resv; >>>> } >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c >>>> index 3a072a7a39f0..c2e6a1a11d7f 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c >>>> @@ -516,26 +516,20 @@ void amdgpu_vmid_free_reserved(struct >>>> amdgpu_device >>>> *adev, >>>> * Reset saved GDW, GWS and OA to force switch on next flush. >>>> */ >>>> void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, >>>> unsigned vmid) >>>> { >>>> struct amdgpu_vmid_mgr *id_mgr = >>>> &adev->vm_manager.id_mgr[vmhub]; >>>> struct amdgpu_vmid *id = &id_mgr->ids[vmid]; >>>> mutex_lock(&id_mgr->lock); >>>> id->owner = 0; >>>> - id->gds_base = 0; >>>> - id->gds_size = 0; >>>> - id->gws_base = 0; >>>> - id->gws_size = 0; >>>> - id->oa_base = 0; >>>> - id->oa_size = 0; >>>> mutex_unlock(&id_mgr->lock); >>>> } >>>> /** >>>> * amdgpu_vmid_reset_all - reset VMID to zero >>>> * >>>> * @adev: amdgpu device structure >>>> * >>>> * Reset VMID to force flush on next use >>>> */ >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h >>>> index 7625419f0fc2..06078e665532 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h >>>> @@ -44,27 +44,20 @@ struct amdgpu_vmid { >>>> struct amdgpu_sync active; >>>> struct dma_fence *last_flush; >>>> uint64_t owner; >>>> uint64_t pd_gpu_addr; >>>> /* last flushed PD/PT update */ >>>> struct dma_fence *flushed_updates; >>>> uint32_t current_gpu_reset_count; >>>> - uint32_t gds_base; >>>> - uint32_t gds_size; >>>> - uint32_t gws_base; >>>> - uint32_t gws_size; >>>> - uint32_t oa_base; >>>> - uint32_t oa_size; >>>> - >>>> unsigned pasid; >>>> struct dma_fence *pasid_mapping; >>>> }; >>>> struct amdgpu_vmid_mgr { >>>> struct mutex lock; >>>> unsigned num_ids; >>>> struct list_head ids_lru; >>>> struct amdgpu_vmid ids[AMDGPU_NUM_VMID]; >>>> atomic_t reserved_vmid_num; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>>> index 57cfe78a262b..3db553f6ad01 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h >>>> @@ -42,23 +42,20 @@ struct amdgpu_job { >>>> struct amdgpu_sync sched_sync; >>>> struct amdgpu_ib *ibs; >>>> struct dma_fence *fence; /* the hw fence */ >>>> uint32_t preamble_status; >>>> uint32_t num_ibs; >>>> void *owner; >>>> bool vm_needs_flush; >>>> uint64_t vm_pd_addr; >>>> unsigned vmid; >>>> unsigned pasid; >>>> - uint32_t gds_base, gds_size; >>>> - uint32_t gws_base, gws_size; >>>> - uint32_t oa_base, oa_size; >>>> uint32_t vram_lost_counter; >>>> /* user fence handling */ >>>> uint64_t uf_addr; >>>> uint64_t uf_sequence; >>>> }; >>>> int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, >>>> struct amdgpu_job **job, struct amdgpu_vm *vm); >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>>> index 29ac3873eeb0..209954290954 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c >>>> @@ -517,27 +517,27 @@ static int amdgpu_info_ioctl(struct drm_device >>>> *dev, >>>> void *data, struct drm_file >>>> case AMDGPU_INFO_VIS_VRAM_USAGE: >>>> ui64 = >>>> amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); >>>> return copy_to_user(out, &ui64, min(size, 8u)) ? >>>> -EFAULT : >>>> 0; >>>> case AMDGPU_INFO_GTT_USAGE: >>>> ui64 = >>>> amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]); >>>> return copy_to_user(out, &ui64, min(size, 8u)) ? >>>> -EFAULT : >>>> 0; >>>> case AMDGPU_INFO_GDS_CONFIG: { >>>> struct drm_amdgpu_info_gds gds_info; >>>> memset(&gds_info, 0, sizeof(gds_info)); >>>> - gds_info.gds_gfx_partition_size = >>>> adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT; >>>> - gds_info.compute_partition_size = >>>> adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT; >>>> - gds_info.gds_total_size = adev->gds.mem.total_size >> >>>> AMDGPU_GDS_SHIFT; >>>> - gds_info.gws_per_gfx_partition = >>>> adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT; >>>> - gds_info.gws_per_compute_partition = >>>> adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT; >>>> - gds_info.oa_per_gfx_partition = >>>> adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT; >>>> - gds_info.oa_per_compute_partition = >>>> adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT; >>>> + gds_info.gds_gfx_partition_size = >>>> adev->gds.mem.gfx_size_per_vmid; >>>> + gds_info.compute_partition_size = >>>> adev->gds.mem.kfd_size_per_vmid; >>>> + gds_info.gds_total_size = adev->gds.mem.total_size; >>>> + gds_info.gws_per_gfx_partition = >>>> adev->gds.gws.gfx_size_per_vmid; >>>> + gds_info.gws_per_compute_partition = >>>> adev->gds.gws.kfd_size_per_vmid; >>>> + gds_info.oa_per_gfx_partition = >>>> adev->gds.oa.gfx_size_per_vmid; >>>> + gds_info.oa_per_compute_partition = >>>> adev->gds.oa.kfd_size_per_vmid; >>>> return copy_to_user(out, &gds_info, >>>> min((size_t)size, >>>> sizeof(gds_info))) ? >>>> -EFAULT : 0; >>>> } >>>> case AMDGPU_INFO_VRAM_GTT: { >>>> struct drm_amdgpu_info_vram_gtt vram_gtt; >>>> vram_gtt.vram_size = adev->gmc.real_vram_size - >>>> atomic64_read(&adev->vram_pin_size); >>>> vram_gtt.vram_cpu_accessible_size = >>>> adev->gmc.visible_vram_size - >>>> atomic64_read(&adev->visible_pin_size); >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>>> index de990bdcdd6c..76770a8c29a5 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c >>>> @@ -178,41 +178,20 @@ void amdgpu_bo_placement_from_domain(struct >>>> amdgpu_bo *abo, u32 domain) >>>> places[c].lpfn = 0; >>>> places[c].flags = TTM_PL_FLAG_SYSTEM; >>>> if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) >>>> places[c].flags |= TTM_PL_FLAG_WC | >>>> TTM_PL_FLAG_UNCACHED; >>>> else >>>> places[c].flags |= TTM_PL_FLAG_CACHED; >>>> c++; >>>> } >>>> - if (domain & AMDGPU_GEM_DOMAIN_GDS) { >>>> - places[c].fpfn = 0; >>>> - places[c].lpfn = 0; >>>> - places[c].flags = TTM_PL_FLAG_UNCACHED | >>>> AMDGPU_PL_FLAG_GDS; >>>> - c++; >>>> - } >>>> - >>>> - if (domain & AMDGPU_GEM_DOMAIN_GWS) { >>>> - places[c].fpfn = 0; >>>> - places[c].lpfn = 0; >>>> - places[c].flags = TTM_PL_FLAG_UNCACHED | >>>> AMDGPU_PL_FLAG_GWS; >>>> - c++; >>>> - } >>>> - >>>> - if (domain & AMDGPU_GEM_DOMAIN_OA) { >>>> - places[c].fpfn = 0; >>>> - places[c].lpfn = 0; >>>> - places[c].flags = TTM_PL_FLAG_UNCACHED | >>>> AMDGPU_PL_FLAG_OA; >>>> - c++; >>>> - } >>>> - >>>> if (!c) { >>>> places[c].fpfn = 0; >>>> places[c].lpfn = 0; >>>> places[c].flags = TTM_PL_MASK_CACHING | >>>> TTM_PL_FLAG_SYSTEM; >>>> c++; >>>> } >>>> BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS); >>>> placement->num_placement = c; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>>> index 907fdf46d895..e089964cbcb7 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h >>>> @@ -120,26 +120,20 @@ static inline struct amdgpu_bo >>>> *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) >>>> */ >>>> static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) >>>> { >>>> switch (mem_type) { >>>> case TTM_PL_VRAM: >>>> return AMDGPU_GEM_DOMAIN_VRAM; >>>> case TTM_PL_TT: >>>> return AMDGPU_GEM_DOMAIN_GTT; >>>> case TTM_PL_SYSTEM: >>>> return AMDGPU_GEM_DOMAIN_CPU; >>>> - case AMDGPU_PL_GDS: >>>> - return AMDGPU_GEM_DOMAIN_GDS; >>>> - case AMDGPU_PL_GWS: >>>> - return AMDGPU_GEM_DOMAIN_GWS; >>>> - case AMDGPU_PL_OA: >>>> - return AMDGPU_GEM_DOMAIN_OA; >>>> default: >>>> break; >>>> } >>>> return 0; >>>> } >>>> /** >>>> * amdgpu_bo_reserve - reserve bo >>>> * @bo: bo structure >>>> * @no_intr: don't return -ERESTARTSYS on pending signal >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> index 9cc239968e40..f6ea9604e611 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>>> @@ -130,24 +130,20 @@ struct amdgpu_ring_funcs { >>>> /* command emit functions */ >>>> void (*emit_ib)(struct amdgpu_ring *ring, >>>> struct amdgpu_ib *ib, >>>> unsigned vmid, bool ctx_switch); >>>> void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, >>>> uint64_t seq, unsigned flags); >>>> void (*emit_pipeline_sync)(struct amdgpu_ring *ring); >>>> void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, >>>> uint64_t pd_addr); >>>> void (*emit_hdp_flush)(struct amdgpu_ring *ring); >>>> - void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, >>>> - uint32_t gds_base, uint32_t gds_size, >>>> - uint32_t gws_base, uint32_t gws_size, >>>> - uint32_t oa_base, uint32_t oa_size); >>>> /* testing functions */ >>>> int (*test_ring)(struct amdgpu_ring *ring); >>>> int (*test_ib)(struct amdgpu_ring *ring, long timeout); >>>> /* insert NOP packets */ >>>> void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); >>>> void (*insert_start)(struct amdgpu_ring *ring); >>>> void (*insert_end)(struct amdgpu_ring *ring); >>>> /* pad the indirect buffer to the necessary number of dw */ >>>> void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); >>>> unsigned (*init_cond_exec)(struct amdgpu_ring *ring); >>>> @@ -226,21 +222,20 @@ struct amdgpu_ring { >>>> #define amdgpu_ring_patch_cs_in_place(r, p, ib) >>>> ((r)->funcs->patch_cs_in_place((p), (ib))) >>>> #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) >>>> #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) >>>> #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) >>>> #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) >>>> #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) >>>> #define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), >>>> (ib), (vmid), (c)) >>>> #define amdgpu_ring_emit_pipeline_sync(r) >>>> (r)->funcs->emit_pipeline_sync((r)) >>>> #define amdgpu_ring_emit_vm_flush(r, vmid, addr) >>>> (r)->funcs->emit_vm_flush((r), (vmid), (addr)) >>>> #define amdgpu_ring_emit_fence(r, addr, seq, flags) >>>> (r)->funcs->emit_fence((r), (addr), (seq), (flags)) >>>> -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) >>>> (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), >>>> (as)) >>>> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) >>>> #define amdgpu_ring_emit_switch_buffer(r) >>>> (r)->funcs->emit_switch_buffer((r)) >>>> #define amdgpu_ring_emit_cntxcntl(r, d) >>>> (r)->funcs->emit_cntxcntl((r), >>>> (d)) >>>> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) >>>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), >>>> (d), >>>> (v)) >>>> #define amdgpu_ring_emit_reg_wait(r, d, v, m) >>>> (r)->funcs->emit_reg_wait((r), (d), (v), (m)) >>>> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) >>>> (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) >>>> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) >>>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) >>>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>>> index 8a158ee922f7..2cc62b0e7ea8 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>>> @@ -195,30 +195,20 @@ static int amdgpu_init_mem_type(struct >>>> ttm_bo_device >>>> *bdev, uint32_t type, >>>> break; >>>> case TTM_PL_VRAM: >>>> /* "On-card" video ram */ >>>> man->func = &amdgpu_vram_mgr_func; >>>> man->gpu_offset = adev->gmc.vram_start; >>>> man->flags = TTM_MEMTYPE_FLAG_FIXED | >>>> TTM_MEMTYPE_FLAG_MAPPABLE; >>>> man->available_caching = TTM_PL_FLAG_UNCACHED | >>>> TTM_PL_FLAG_WC; >>>> man->default_caching = TTM_PL_FLAG_WC; >>>> break; >>>> - case AMDGPU_PL_GDS: >>>> - case AMDGPU_PL_GWS: >>>> - case AMDGPU_PL_OA: >>>> - /* On-chip GDS memory*/ >>>> - man->func = &ttm_bo_manager_func; >>>> - man->gpu_offset = 0; >>>> - man->flags = TTM_MEMTYPE_FLAG_FIXED | >>>> TTM_MEMTYPE_FLAG_CMA; >>>> - man->available_caching = TTM_PL_FLAG_UNCACHED; >>>> - man->default_caching = TTM_PL_FLAG_UNCACHED; >>>> - break; >>>> default: >>>> DRM_ERROR("Unsupported memory type %u\n", >>>> (unsigned)type); >>>> return -EINVAL; >>>> } >>>> return 0; >>>> } >>>> /** >>>> * amdgpu_evict_flags - Compute placement flags >>>> * >>>> @@ -1039,25 +1029,20 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt >>>> *ttm, >>>> if (r) { >>>> DRM_ERROR("failed to pin userptr\n"); >>>> return r; >>>> } >>>> } >>>> if (!ttm->num_pages) { >>>> WARN(1, "nothing to bind %lu pages for mreg %p back >>>> %p!\n", >>>> ttm->num_pages, bo_mem, ttm); >>>> } >>>> - if (bo_mem->mem_type == AMDGPU_PL_GDS || >>>> - bo_mem->mem_type == AMDGPU_PL_GWS || >>>> - bo_mem->mem_type == AMDGPU_PL_OA) >>>> - return -EINVAL; >>>> - >>>> if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) { >>>> gtt->offset = AMDGPU_BO_INVALID_OFFSET; >>>> return 0; >>>> } >>>> /* compute PTE flags relevant to this BO memory */ >>>> flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); >>>> /* bind pages into GART page tables */ >>>> gtt->offset = ((u64)bo_mem->start << PAGE_SHIFT) - >>>> adev->gmc.gart_start; >>>> @@ -1818,60 +1803,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) >>>> /* Initialize GTT memory pool */ >>>> r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> >>>> PAGE_SHIFT); >>>> if (r) { >>>> DRM_ERROR("Failed initializing GTT heap.\n"); >>>> return r; >>>> } >>>> DRM_INFO("amdgpu: %uM of GTT memory ready.\n", >>>> (unsigned)(gtt_size / (1024 * 1024))); >>>> - /* Initialize various on-chip memory pools */ >>>> - adev->gds.mem.total_size = adev->gds.mem.total_size << >>>> AMDGPU_GDS_SHIFT; >>>> - adev->gds.mem.gfx_partition_size = >>>> adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; >>>> - adev->gds.mem.cs_partition_size = >>>> adev->gds.mem.cs_partition_size >>>> << AMDGPU_GDS_SHIFT; >>>> - adev->gds.gws.total_size = adev->gds.gws.total_size << >>>> AMDGPU_GWS_SHIFT; >>>> - adev->gds.gws.gfx_partition_size = >>>> adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT; >>>> - adev->gds.gws.cs_partition_size = >>>> adev->gds.gws.cs_partition_size >>>> << AMDGPU_GWS_SHIFT; >>>> - adev->gds.oa.total_size = adev->gds.oa.total_size << >>>> AMDGPU_OA_SHIFT; >>>> - adev->gds.oa.gfx_partition_size = >>>> adev->gds.oa.gfx_partition_size >>>> << AMDGPU_OA_SHIFT; >>>> - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size >>>> << >>>> AMDGPU_OA_SHIFT; >>>> - /* GDS Memory */ >>>> - if (adev->gds.mem.total_size) { >>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, >>>> - adev->gds.mem.total_size >> >>>> PAGE_SHIFT); >>>> - if (r) { >>>> - DRM_ERROR("Failed initializing GDS heap.\n"); >>>> - return r; >>>> - } >>>> - } >>>> - >>>> - /* GWS */ >>>> - if (adev->gds.gws.total_size) { >>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, >>>> - adev->gds.gws.total_size >> >>>> PAGE_SHIFT); >>>> - if (r) { >>>> - DRM_ERROR("Failed initializing gws heap.\n"); >>>> - return r; >>>> - } >>>> - } >>>> - >>>> - /* OA */ >>>> - if (adev->gds.oa.total_size) { >>>> - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, >>>> - adev->gds.oa.total_size >> >>>> PAGE_SHIFT); >>>> - if (r) { >>>> - DRM_ERROR("Failed initializing oa heap.\n"); >>>> - return r; >>>> - } >>>> - } >>>> - >>>> /* Register debugfs entries for amdgpu_ttm */ >>>> r = amdgpu_ttm_debugfs_init(adev); >>>> if (r) { >>>> DRM_ERROR("Failed to init debugfs\n"); >>>> return r; >>>> } >>>> return 0; >>>> } >>>> /** >>>> @@ -1892,26 +1837,20 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) >>>> return; >>>> amdgpu_ttm_debugfs_fini(adev); >>>> amdgpu_ttm_fw_reserve_vram_fini(adev); >>>> if (adev->mman.aper_base_kaddr) >>>> iounmap(adev->mman.aper_base_kaddr); >>>> adev->mman.aper_base_kaddr = NULL; >>>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); >>>> ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); >>>> - if (adev->gds.mem.total_size) >>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); >>>> - if (adev->gds.gws.total_size) >>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); >>>> - if (adev->gds.oa.total_size) >>>> - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); >>>> ttm_bo_device_release(&adev->mman.bdev); >>>> amdgpu_ttm_global_fini(adev); >>>> adev->mman.initialized = false; >>>> DRM_INFO("amdgpu: ttm finalized\n"); >>>> } >>>> /** >>>> * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer >>>> functions >>>> * >>>> * @adev: amdgpu_device pointer >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>>> index fe8f276e9811..04557a382b19 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>>> @@ -20,28 +20,20 @@ >>>> * OTHER DEALINGS IN THE SOFTWARE. >>>> * >>>> */ >>>> #ifndef __AMDGPU_TTM_H__ >>>> #define __AMDGPU_TTM_H__ >>>> #include "amdgpu.h" >>>> #include <drm/gpu_scheduler.h> >>>> -#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) >>>> -#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) >>>> -#define AMDGPU_PL_OA (TTM_PL_PRIV + 2) >>>> - >>>> -#define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0) >>>> -#define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1) >>>> -#define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2) >>>> - >>>> #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 >>>> #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 >>>> struct amdgpu_mman { >>>> struct ttm_bo_global_ref bo_global_ref; >>>> struct drm_global_reference mem_global_ref; >>>> struct ttm_bo_device bdev; >>>> bool mem_global_referenced; >>>> bool initialized; >>>> void __iomem *aper_base_kaddr; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> index be1659fedf94..c66f1c6f0ba8 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>>> @@ -803,86 +803,69 @@ void amdgpu_vm_check_compute_bug(struct >>>> amdgpu_device *adev) >>>> * Returns: >>>> * True if sync is needed. >>>> */ >>>> bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, >>>> struct amdgpu_job *job) >>>> { >>>> struct amdgpu_device *adev = ring->adev; >>>> unsigned vmhub = ring->funcs->vmhub; >>>> struct amdgpu_vmid_mgr *id_mgr = >>>> &adev->vm_manager.id_mgr[vmhub]; >>>> struct amdgpu_vmid *id; >>>> - bool gds_switch_needed; >>>> bool vm_flush_needed = job->vm_needs_flush || >>>> ring->has_compute_vm_bug; >>>> if (job->vmid == 0) >>>> return false; >>>> id = &id_mgr->ids[job->vmid]; >>>> - gds_switch_needed = ring->funcs->emit_gds_switch && ( >>>> - id->gds_base != job->gds_base || >>>> - id->gds_size != job->gds_size || >>>> - id->gws_base != job->gws_base || >>>> - id->gws_size != job->gws_size || >>>> - id->oa_base != job->oa_base || >>>> - id->oa_size != job->oa_size); >>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) >>>> return true; >>>> - return vm_flush_needed || gds_switch_needed; >>>> + return vm_flush_needed; >>>> } >>>> /** >>>> * amdgpu_vm_flush - hardware flush the vm >>>> * >>>> * @ring: ring to use for flush >>>> * @job: related job >>>> * @need_pipe_sync: is pipe sync needed >>>> * >>>> * Emit a VM flush when it is necessary. >>>> * >>>> * Returns: >>>> * 0 on success, errno otherwise. >>>> */ >>>> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, >>>> bool need_pipe_sync) >>>> { >>>> struct amdgpu_device *adev = ring->adev; >>>> unsigned vmhub = ring->funcs->vmhub; >>>> struct amdgpu_vmid_mgr *id_mgr = >>>> &adev->vm_manager.id_mgr[vmhub]; >>>> struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; >>>> - bool gds_switch_needed = ring->funcs->emit_gds_switch && ( >>>> - id->gds_base != job->gds_base || >>>> - id->gds_size != job->gds_size || >>>> - id->gws_base != job->gws_base || >>>> - id->gws_size != job->gws_size || >>>> - id->oa_base != job->oa_base || >>>> - id->oa_size != job->oa_size); >>>> bool vm_flush_needed = job->vm_needs_flush; >>>> bool pasid_mapping_needed = id->pasid != job->pasid || >>>> !id->pasid_mapping || >>>> !dma_fence_is_signaled(id->pasid_mapping); >>>> struct dma_fence *fence = NULL; >>>> unsigned patch_offset = 0; >>>> int r; >>>> if (amdgpu_vmid_had_gpu_reset(adev, id)) { >>>> - gds_switch_needed = true; >>>> vm_flush_needed = true; >>>> pasid_mapping_needed = true; >>>> } >>>> - gds_switch_needed &= !!ring->funcs->emit_gds_switch; >>>> vm_flush_needed &= !!ring->funcs->emit_vm_flush && >>>> job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; >>>> pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping >>>> && >>>> ring->funcs->emit_wreg; >>>> - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) >>>> + if (!vm_flush_needed && !need_pipe_sync) >>>> return 0; >>>> if (ring->funcs->init_cond_exec) >>>> patch_offset = amdgpu_ring_init_cond_exec(ring); >>>> if (need_pipe_sync) >>>> amdgpu_ring_emit_pipeline_sync(ring); >>>> if (vm_flush_needed) { >>>> trace_amdgpu_vm_flush(ring, job->vmid, >>>> job->vm_pd_addr); >>>> @@ -907,33 +890,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, >>>> struct >>>> amdgpu_job *job, bool need_ >>>> mutex_unlock(&id_mgr->lock); >>>> } >>>> if (pasid_mapping_needed) { >>>> id->pasid = job->pasid; >>>> dma_fence_put(id->pasid_mapping); >>>> id->pasid_mapping = dma_fence_get(fence); >>>> } >>>> dma_fence_put(fence); >>>> - if (ring->funcs->emit_gds_switch && gds_switch_needed) { >>>> - id->gds_base = job->gds_base; >>>> - id->gds_size = job->gds_size; >>>> - id->gws_base = job->gws_base; >>>> - id->gws_size = job->gws_size; >>>> - id->oa_base = job->oa_base; >>>> - id->oa_size = job->oa_size; >>>> - amdgpu_ring_emit_gds_switch(ring, job->vmid, >>>> job->gds_base, >>>> - job->gds_size, >>>> job->gws_base, >>>> - job->gws_size, job->oa_base, >>>> - job->oa_size); >>>> - } >>>> - >>>> if (ring->funcs->patch_cond_exec) >>>> amdgpu_ring_patch_cond_exec(ring, patch_offset); >>>> /* the double SWITCH_BUFFER here *cannot* be skipped by >>>> COND_EXEC >>>> */ >>>> if (ring->funcs->emit_switch_buffer) { >>>> amdgpu_ring_emit_switch_buffer(ring); >>>> amdgpu_ring_emit_switch_buffer(ring); >>>> } >>>> return 0; >>>> } >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >>>> index a15d9c0f233b..f5228e169c3a 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c >>>> @@ -1890,21 +1890,21 @@ static void gfx_v7_0_config_init(struct >>>> amdgpu_device *adev) >>>> * >>>> * @adev: amdgpu_device pointer >>>> * >>>> * Configures the 3D engine and tiling configuration >>>> * registers so that the 3D engine is usable. >>>> */ >>>> static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) >>>> { >>>> u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; >>>> u32 tmp; >>>> - int i; >>>> + int i, vmid; >>>> WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); >>>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); >>>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); >>>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); >>>> gfx_v7_0_tiling_mode_table_init(adev); >>>> gfx_v7_0_setup_rb(adev); >>>> @@ -2014,20 +2014,42 @@ static void gfx_v7_0_gpu_init(struct >>>> amdgpu_device >>>> *adev) >>>> tmp = RREG32(mmSPI_ARB_PRIORITY); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); >>>> WREG32(mmSPI_ARB_PRIORITY, tmp); >>>> mutex_unlock(&adev->grbm_idx_mutex); >>>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { >>>> + unsigned gds_size, gws_size, oa_size; >>>> + >>>> + if (vmid < >>>> adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) >>>> { >>>> + gds_size = adev->gds.mem.gfx_size_per_vmid; >>>> + gws_size = adev->gds.gws.gfx_size_per_vmid; >>>> + oa_size = adev->gds.oa.gfx_size_per_vmid; >>>> + } else { >>>> + gds_size = adev->gds.mem.kfd_size_per_vmid; >>>> + gws_size = adev->gds.gws.kfd_size_per_vmid; >>>> + oa_size = adev->gds.oa.kfd_size_per_vmid; >>>> + } >>>> + >>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * >>>> gds_size); >>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size); >>>> + WREG32(amdgpu_gds_reg_offset[vmid].gws, >>>> + (vmid * gws_size) | >>>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); >>>> + WREG32(amdgpu_gds_reg_offset[vmid].oa, >>>> + ((1 << oa_size) - 1) << (vmid * oa_size)); >>>> + } >>>> + >>>> udelay(50); >>>> } >>>> /* >>>> * GPU scratch registers helpers function. >>>> */ >>>> /** >>>> * gfx_v7_0_scratch_init - setup driver info for CP scratch regs >>>> * >>>> * @adev: amdgpu_device pointer >>>> @@ -4157,68 +4179,20 @@ static uint64_t >>>> gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev) >>>> uint64_t clock; >>>> mutex_lock(&adev->gfx.gpu_clock_mutex); >>>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); >>>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | >>>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); >>>> mutex_unlock(&adev->gfx.gpu_clock_mutex); >>>> return clock; >>>> } >>>> -static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, >>>> - uint32_t vmid, >>>> - uint32_t gds_base, uint32_t >>>> gds_size, >>>> - uint32_t gws_base, uint32_t >>>> gws_size, >>>> - uint32_t oa_base, uint32_t >>>> oa_size) >>>> -{ >>>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT; >>>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT; >>>> - >>>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT; >>>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT; >>>> - >>>> - oa_base = oa_base >> AMDGPU_OA_SHIFT; >>>> - oa_size = oa_size >> AMDGPU_OA_SHIFT; >>>> - >>>> - /* GDS Base */ >>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | >>>> - WRITE_DATA_DST_SEL(0))); >>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); >>>> - amdgpu_ring_write(ring, 0); >>>> - amdgpu_ring_write(ring, gds_base); >>>> - >>>> - /* GDS Size */ >>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | >>>> - WRITE_DATA_DST_SEL(0))); >>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); >>>> - amdgpu_ring_write(ring, 0); >>>> - amdgpu_ring_write(ring, gds_size); >>>> - >>>> - /* GWS */ >>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | >>>> - WRITE_DATA_DST_SEL(0))); >>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); >>>> - amdgpu_ring_write(ring, 0); >>>> - amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | >>>> gws_base); >>>> - >>>> - /* OA */ >>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | >>>> - WRITE_DATA_DST_SEL(0))); >>>> - amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); >>>> - amdgpu_ring_write(ring, 0); >>>> - amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << >>>> oa_base)); >>>> -} >>>> - >>>> static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, >>>> unsigned vmid) >>>> { >>>> struct amdgpu_device *adev = ring->adev; >>>> uint32_t value = 0; >>>> value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); >>>> value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); >>>> value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); >>>> value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); >>>> WREG32(mmSQ_CMD, value); >>>> @@ -4584,55 +4558,32 @@ static int gfx_v7_0_sw_init(void *handle) >>>> >>>> ring_id, >>>> i, k, >>>> j); >>>> if (r) >>>> return r; >>>> ring_id++; >>>> } >>>> } >>>> } >>>> - /* reserve GDS, GWS and OA resource for gfx */ >>>> - r = amdgpu_bo_create_kernel(adev, >>>> adev->gds.mem.gfx_partition_size, >>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, >>>> - &adev->gds.gds_gfx_bo, NULL, NULL); >>>> - if (r) >>>> - return r; >>>> - >>>> - r = amdgpu_bo_create_kernel(adev, >>>> adev->gds.gws.gfx_partition_size, >>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, >>>> - &adev->gds.gws_gfx_bo, NULL, NULL); >>>> - if (r) >>>> - return r; >>>> - >>>> - r = amdgpu_bo_create_kernel(adev, >>>> adev->gds.oa.gfx_partition_size, >>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, >>>> - &adev->gds.oa_gfx_bo, NULL, NULL); >>>> - if (r) >>>> - return r; >>>> - >>>> adev->gfx.ce_ram_size = 0x8000; >>>> gfx_v7_0_gpu_early_init(adev); >>>> return r; >>>> } >>>> static int gfx_v7_0_sw_fini(void *handle) >>>> { >>>> int i; >>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); >>>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); >>>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); >>>> - >>>> for (i = 0; i < adev->gfx.num_gfx_rings; i++) >>>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); >>>> for (i = 0; i < adev->gfx.num_compute_rings; i++) >>>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]); >>>> gfx_v7_0_cp_compute_fini(adev); >>>> gfx_v7_0_rlc_fini(adev); >>>> gfx_v7_0_mec_fini(adev); >>>> amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, >>>> &adev->gfx.rlc.clear_state_gpu_addr, >>>> @@ -5073,64 +5024,60 @@ static const struct amd_ip_funcs >>>> gfx_v7_0_ip_funcs >>>> = { >>>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { >>>> .type = AMDGPU_RING_TYPE_GFX, >>>> .align_mask = 0xff, >>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF), >>>> .support_64bit_ptrs = false, >>>> .get_rptr = gfx_v7_0_ring_get_rptr, >>>> .get_wptr = gfx_v7_0_ring_get_wptr_gfx, >>>> .set_wptr = gfx_v7_0_ring_set_wptr_gfx, >>>> .emit_frame_size = >>>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */ >>>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */ >>>> 5 + /* hdp invalidate */ >>>> 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for >>>> user >>>> fence, vm fence */ >>>> 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ >>>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* >>>> gfx_v7_0_ring_emit_vm_flush */ >>>> 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt >>>> flush*/ >>>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ >>>> .emit_ib = gfx_v7_0_ring_emit_ib_gfx, >>>> .emit_fence = gfx_v7_0_ring_emit_fence_gfx, >>>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, >>>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, >>>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, >>>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, >>>> .test_ring = gfx_v7_0_ring_test_ring, >>>> .test_ib = gfx_v7_0_ring_test_ib, >>>> .insert_nop = amdgpu_ring_insert_nop, >>>> .pad_ib = amdgpu_ring_generic_pad_ib, >>>> .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, >>>> .emit_wreg = gfx_v7_0_ring_emit_wreg, >>>> .soft_recovery = gfx_v7_0_ring_soft_recovery, >>>> }; >>>> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = >>>> { >>>> .type = AMDGPU_RING_TYPE_COMPUTE, >>>> .align_mask = 0xff, >>>> .nop = PACKET3(PACKET3_NOP, 0x3FFF), >>>> .support_64bit_ptrs = false, >>>> .get_rptr = gfx_v7_0_ring_get_rptr, >>>> .get_wptr = gfx_v7_0_ring_get_wptr_compute, >>>> .set_wptr = gfx_v7_0_ring_set_wptr_compute, >>>> .emit_frame_size = >>>> - 20 + /* gfx_v7_0_ring_emit_gds_switch */ >>>> 7 + /* gfx_v7_0_ring_emit_hdp_flush */ >>>> 5 + /* hdp invalidate */ >>>> 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ >>>> CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* >>>> gfx_v7_0_ring_emit_vm_flush */ >>>> 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for >>>> user >>>> fence, vm fence */ >>>> .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */ >>>> .emit_ib = gfx_v7_0_ring_emit_ib_compute, >>>> .emit_fence = gfx_v7_0_ring_emit_fence_compute, >>>> .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync, >>>> .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, >>>> - .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, >>>> .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, >>>> .test_ring = gfx_v7_0_ring_test_ring, >>>> .test_ib = gfx_v7_0_ring_test_ib, >>>> .insert_nop = amdgpu_ring_insert_nop, >>>> .pad_ib = amdgpu_ring_generic_pad_ib, >>>> .emit_wreg = gfx_v7_0_ring_emit_wreg, >>>> }; >>>> static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) >>>> { >>>> @@ -5169,42 +5116,28 @@ static void gfx_v7_0_set_irq_funcs(struct >>>> amdgpu_device *adev) >>>> adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs; >>>> } >>>> static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) >>>> { >>>> /* init asci gds info */ >>>> adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); >>>> adev->gds.gws.total_size = 64; >>>> adev->gds.oa.total_size = 16; >>>> - if (adev->gds.mem.total_size == 64 * 1024) { >>>> - adev->gds.mem.gfx_partition_size = 4096; >>>> - adev->gds.mem.cs_partition_size = 4096; >>>> - >>>> - adev->gds.gws.gfx_partition_size = 4; >>>> - adev->gds.gws.cs_partition_size = 4; >>>> - >>>> - adev->gds.oa.gfx_partition_size = 4; >>>> - adev->gds.oa.cs_partition_size = 1; >>>> - } else { >>>> - adev->gds.mem.gfx_partition_size = 1024; >>>> - adev->gds.mem.cs_partition_size = 1024; >>>> - >>>> - adev->gds.gws.gfx_partition_size = 16; >>>> - adev->gds.gws.cs_partition_size = 16; >>>> - >>>> - adev->gds.oa.gfx_partition_size = 4; >>>> - adev->gds.oa.cs_partition_size = 4; >>>> - } >>>> + adev->gds.mem.gfx_size_per_vmid = adev->gds.mem.total_size / >>>> AMDGPU_NUM_VMID; >>>> + adev->gds.mem.kfd_size_per_vmid = adev->gds.mem.total_size / >>>> AMDGPU_NUM_VMID; >>>> + adev->gds.gws.gfx_size_per_vmid = adev->gds.gws.total_size / >>>> AMDGPU_NUM_VMID; >>>> + adev->gds.gws.kfd_size_per_vmid = adev->gds.gws.total_size / >>>> AMDGPU_NUM_VMID; >>>> + adev->gds.oa.gfx_size_per_vmid = adev->gds.oa.total_size / 8; /* >>>> gfx only */ >>>> + adev->gds.oa.kfd_size_per_vmid = 0; >>>> } >>>> - >>>> static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) >>>> { >>>> int i, j, k, counter, active_cu_number = 0; >>>> u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; >>>> struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; >>>> unsigned disable_masks[4 * 2]; >>>> u32 ao_cu_num; >>>> if (adev->flags & AMD_IS_APU) >>>> ao_cu_num = 2; >>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >>>> index 3882689b2d8f..b11a54bd0668 100644 >>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c >>>> @@ -2154,57 +2154,34 @@ static int gfx_v8_0_sw_init(void *handle) >>>> kiq = &adev->gfx.kiq; >>>> r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); >>>> if (r) >>>> return r; >>>> /* create MQD for all compute queues as well as KIQ for SRIOV >>>> case >>>> */ >>>> r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct >>>> vi_mqd_allocation)); >>>> if (r) >>>> return r; >>>> - /* reserve GDS, GWS and OA resource for gfx */ >>>> - r = amdgpu_bo_create_kernel(adev, >>>> adev->gds.mem.gfx_partition_size, >>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, >>>> - &adev->gds.gds_gfx_bo, NULL, NULL); >>>> - if (r) >>>> - return r; >>>> - >>>> - r = amdgpu_bo_create_kernel(adev, >>>> adev->gds.gws.gfx_partition_size, >>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, >>>> - &adev->gds.gws_gfx_bo, NULL, NULL); >>>> - if (r) >>>> - return r; >>>> - >>>> - r = amdgpu_bo_create_kernel(adev, >>>> adev->gds.oa.gfx_partition_size, >>>> - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, >>>> - &adev->gds.oa_gfx_bo, NULL, NULL); >>>> - if (r) >>>> - return r; >>>> - >>>> adev->gfx.ce_ram_size = 0x8000; >>>> r = gfx_v8_0_gpu_early_init(adev); >>>> if (r) >>>> return r; >>>> return 0; >>>> } >>>> static int gfx_v8_0_sw_fini(void *handle) >>>> { >>>> int i; >>>> struct amdgpu_device *adev = (struct amdgpu_device *)handle; >>>> - amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); >>>> - amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); >>>> - amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); >>>> - >>>> for (i = 0; i < adev->gfx.num_gfx_rings; i++) >>>> amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); >>>> for (i = 0; i < adev->gfx.num_compute_rings; i++) >>>> amdgpu_ring_fini(&adev->gfx.compute_ring[i]); >>>> amdgpu_gfx_compute_mqd_sw_fini(adev); >>>> amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, >>>> &adev->gfx.kiq.irq); >>>> amdgpu_gfx_kiq_fini(adev); >>>> gfx_v8_0_mec_fini(adev); >>>> @@ -3850,21 +3827,21 @@ static void gfx_v8_0_config_init(struct >>>> amdgpu_device *adev) >>>> case CHIP_CARRIZO: >>>> case CHIP_STONEY: >>>> adev->gfx.config.double_offchip_lds_buf = 0; >>>> break; >>>> } >>>> } >>>> static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) >>>> { >>>> u32 tmp, sh_static_mem_cfg; >>>> - int i; >>>> + int i, vmid; >>>> WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); >>>> WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); >>>> WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); >>>> WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); >>>> gfx_v8_0_tiling_mode_table_init(adev); >>>> gfx_v8_0_setup_rb(adev); >>>> gfx_v8_0_get_cu_info(adev); >>>> gfx_v8_0_config_init(adev); >>>> @@ -3927,20 +3904,41 @@ static void gfx_v8_0_gpu_init(struct >>>> amdgpu_device >>>> *adev) >>>> tmp = RREG32(mmSPI_ARB_PRIORITY); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); >>>> tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); >>>> WREG32(mmSPI_ARB_PRIORITY, tmp); >>>> mutex_unlock(&adev->grbm_idx_mutex); >>>> + for (vmid = 0; vmid < AMDGPU_NUM_VMID; vmid++) { >>>> + unsigned gds_size, gws_size, oa_size; >>>> + >>>> + if (vmid < >>>> adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids) >>>> { >>>> + gds_size = adev->gds.mem.gfx_size_per_vmid; >>>> + gws_size = adev->gds.gws.gfx_size_per_vmid; >>>> + oa_size = adev->gds.oa.gfx_size_per_vmid; >>>> + } else { >>>> + gds_size = adev->gds.mem.kfd_size_per_vmid; >>>> + gws_size = adev->gds.gws.kfd_size_per_vmid; >>>> + oa_size = adev->gds.oa.kfd_size_per_vmid; >>>> + } >>>> + >>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, vmid * >>>> gds_size); >>>> + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, gds_size); >>>> + WREG32(amdgpu_gds_reg_offset[vmid].gws, >>>> + (vmid * gws_size) | >>>> + (gws_size << GDS_GWS_VMID0__SIZE__SHIFT)); >>>> + WREG32(amdgpu_gds_reg_offset[vmid].oa, >>>> + ((1 << oa_size) - 1) << (vmid * oa_size)); >>>> + } >>>> } >>>> static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device >>>> *adev) >>>> { >>>> u32 i, j, k; >>>> u32 mask; >>>> mutex_lock(&adev->grbm_idx_mutex); >>>> for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { >>>> for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { >>>> @@ -5383,68 +5381,20 @@ static uint64_t >>>> gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) >>>> uint64_t clock; >>>> mutex_lock(&adev->gfx.gpu_clock_mutex); >>>> WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); >>>> clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | >>>> ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); >>>> mutex_unlock(&adev->gfx.gpu_clock_mutex); >>>> return clock; >>>> } >>>> -static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, >>>> - uint32_t vmid, >>>> - uint32_t gds_base, uint32_t >>>> gds_size, >>>> - uint32_t gws_base, uint32_t >>>> gws_size, >>>> - uint32_t oa_base, uint32_t >>>> oa_size) >>>> -{ >>>> - gds_base = gds_base >> AMDGPU_GDS_SHIFT; >>>> - gds_size = gds_size >> AMDGPU_GDS_SHIFT; >>>> - >>>> - gws_base = gws_base >> AMDGPU_GWS_SHIFT; >>>> - gws_size = gws_size >> AMDGPU_GWS_SHIFT; >>>> - >>>> - oa_base = oa_base >> AMDGPU_OA_SHIFT; >>>> - oa_size = oa_size >> AMDGPU_OA_SHIFT; >>>> - >>>> - /* GDS Base */ >>>> - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); >>>> - amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |