Hi Siqueira, as discussed on the call if you can wrap your head around how the amdgpu_device_enforce_isolation() function works it should be trivial to write a new function or extend the function to insert a CPU bubble whenever the ownership of one of the compute rings change. IIRC we already do load balancing between the 8 available compute rings anyway, so the only part missing is the CPU bubble to ensure that a queue reset only affects a single application. Regards, Christian. Am 07.03.25 um 14:48 schrieb Christian König: > Limiting the number of available VMIDs to enforce isolation causes some > issues with gang submit and applying certain HW workarounds which > require multiple VMIDs to work correctly. > > So instead start to track all submissions to the relevant engines in a > per partition data structure and use the dma_fences of the submissions > to enforce isolation similar to what a VMID limit does. > > v2: use ~0l for jobs without isolation to distinct it from kernel > submissions which uses NULL for the owner. Add some warning when we > are OOM. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 98 +++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 43 ++++------ > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 16 +++- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 19 +++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 1 + > 6 files changed, 155 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 87062c1adcdf..f68a348dcec9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1211,9 +1211,15 @@ struct amdgpu_device { > bool debug_exp_resets; > bool debug_disable_gpu_ring_reset; > > - bool enforce_isolation[MAX_XCP]; > - /* Added this mutex for cleaner shader isolation between GFX and compute processes */ > + /* Protection for the following isolation structure */ > struct mutex enforce_isolation_mutex; > + bool enforce_isolation[MAX_XCP]; > + struct amdgpu_isolation { > + void *owner; > + struct dma_fence *spearhead; > + struct amdgpu_sync active; > + struct amdgpu_sync prev; > + } isolation[MAX_XCP]; > > struct amdgpu_init_level *init_lvl; > > @@ -1499,6 +1505,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, > struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); > struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, > struct dma_fence *gang); > +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, > + struct amdgpu_ring *ring, > + struct amdgpu_job *job); > bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); > ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); > ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 337543ec615c..3fa7788b4e12 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4272,6 +4272,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, > mutex_init(&adev->gfx.reset_sem_mutex); > /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ > mutex_init(&adev->enforce_isolation_mutex); > + for (i = 0; i < MAX_XCP; ++i) { > + adev->isolation[i].spearhead = dma_fence_get_stub(); > + amdgpu_sync_create(&adev->isolation[i].active); > + amdgpu_sync_create(&adev->isolation[i].prev); > + } > mutex_init(&adev->gfx.kfd_sch_mutex); > > amdgpu_device_init_apu_flags(adev); > @@ -4770,7 +4775,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) > > void amdgpu_device_fini_sw(struct amdgpu_device *adev) > { > - int idx; > + int i, idx; > bool px; > > amdgpu_device_ip_fini(adev); > @@ -4778,6 +4783,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) > amdgpu_ucode_release(&adev->firmware.gpu_info_fw); > adev->accel_working = false; > dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); > + for (i = 0; i < MAX_XCP; ++i) { > + dma_fence_put(adev->isolation[i].spearhead); > + amdgpu_sync_free(&adev->isolation[i].active); > + amdgpu_sync_free(&adev->isolation[i].prev); > + } > > amdgpu_reset_fini(adev); > > @@ -6913,6 +6923,92 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, > return NULL; > } > > +/** > + * amdgpu_device_enforce_isolation - enforce HW isolation > + * @adev: the amdgpu device pointer > + * @ring: the HW ring the job is supposed to run on > + * @job: the job which is about to be pushed to the HW ring > + * > + * Makes sure that only one client at a time can use the GFX block. > + * Returns: The dependency to wait on before the job can be pushed to the HW. > + * The function is called multiple times until NULL is returned. > + */ > +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, > + struct amdgpu_ring *ring, > + struct amdgpu_job *job) > +{ > + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; > + struct drm_sched_fence *f = job->base.s_fence; > + struct dma_fence *dep; > + void *owner; > + int r; > + > + /* > + * For now enforce isolation only for the GFX block since we only need > + * the cleaner shader on those rings. > + */ > + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && > + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) > + return NULL; > + > + /* > + * All submissions where enforce isolation is false are handled as if > + * they come from a single client. Use ~0l as the owner to distinct it > + * from kernel submissions where the owner is NULL. > + */ > + owner = job->enforce_isolation ? f->owner : (void*)~0l; > + > + mutex_lock(&adev->enforce_isolation_mutex); > + > + /* > + * The "spearhead" submission is the first one which changes the > + * ownership to its client. We always need to wait for it to be > + * pushed to the HW before proceeding with anything. > + */ > + if (&f->scheduled != isolation->spearhead && > + !dma_fence_is_signaled(isolation->spearhead)) { > + dep = isolation->spearhead; > + goto out_grab_ref; > + } > + > + if (isolation->owner != owner) { > + > + /* > + * Wait for any gang to be assembled before switching to a > + * different owner or otherwise we could deadlock the > + * submissions. > + */ > + if (!job->gang_submit) { > + dep = amdgpu_device_get_gang(adev); > + if (!dma_fence_is_signaled(dep)) > + goto out_return_dep; > + dma_fence_put(dep); > + } > + > + dma_fence_put(isolation->spearhead); > + isolation->spearhead = dma_fence_get(&f->scheduled); > + amdgpu_sync_move(&isolation->active, &isolation->prev); > + isolation->owner = owner; > + } > + > + /* > + * Specifying the ring here helps to pipeline submissions even when > + * isolation is enabled. If that is not desired for testing NULL can be > + * used instead of the ring to enforce a CPU round trip while switching > + * between clients. > + */ > + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); > + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); > + if (r) > + DRM_WARN("OOM tracking isolation\n"); > + > +out_grab_ref: > + dma_fence_get(dep); > +out_return_dep: > + mutex_unlock(&adev->enforce_isolation_mutex); > + return dep; > +} > + > bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) > { > switch (adev->asic_type) { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > index 56d27cea052e..92ab821afc06 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c > @@ -287,40 +287,27 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, > (*id)->flushed_updates < updates || > !(*id)->last_flush || > ((*id)->last_flush->context != fence_context && > - !dma_fence_is_signaled((*id)->last_flush))) { > + !dma_fence_is_signaled((*id)->last_flush))) > + needs_flush = true; > + > + if ((*id)->owner != vm->immediate.fence_context || > + (!adev->vm_manager.concurrent_flush && needs_flush)) { > struct dma_fence *tmp; > > - /* Wait for the gang to be assembled before using a > - * reserved VMID or otherwise the gang could deadlock. > + /* Don't use per engine and per process VMID at the > + * same time > */ > - tmp = amdgpu_device_get_gang(adev); > - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { > + if (adev->vm_manager.concurrent_flush) > + ring = NULL; > + > + /* to prevent one context starved by another context */ > + (*id)->pd_gpu_addr = 0; > + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); > + if (tmp) { > *id = NULL; > - *fence = tmp; > + *fence = dma_fence_get(tmp); > return 0; > } > - dma_fence_put(tmp); > - > - /* Make sure the id is owned by the gang before proceeding */ > - if (!job->gang_submit || > - (*id)->owner != vm->immediate.fence_context) { > - > - /* Don't use per engine and per process VMID at the > - * same time > - */ > - if (adev->vm_manager.concurrent_flush) > - ring = NULL; > - > - /* to prevent one context starved by another context */ > - (*id)->pd_gpu_addr = 0; > - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); > - if (tmp) { > - *id = NULL; > - *fence = dma_fence_get(tmp); > - return 0; > - } > - } > - needs_flush = true; > } > > /* Good we can use this VMID. Remember this submission as > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 5537c8bfd227..1a6cfef4c071 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -360,17 +360,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, > { > struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); > struct amdgpu_job *job = to_amdgpu_job(sched_job); > - struct dma_fence *fence = NULL; > + struct dma_fence *fence; > int r; > > r = drm_sched_entity_error(s_entity); > if (r) > goto error; > > - if (job->gang_submit) > + if (job->gang_submit) { > fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); > + if (fence) > + return fence; > + } > + > + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job); > + if (fence) > + return fence; > > - if (!fence && job->vm && !job->vmid) { > + if (job->vm && !job->vmid) { > r = amdgpu_vmid_grab(job->vm, ring, job, &fence); > if (r) { > dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); > @@ -383,9 +390,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, > */ > if (!fence) > job->vm = NULL; > + return fence; > } > > - return fence; > + return NULL; > > error: > dma_fence_set_error(&job->base.s_fence->finished, r); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > index bfe12164d27d..c5f9db6b32a4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > @@ -405,6 +405,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) > return 0; > } > > +/** > + * amdgpu_sync_move - move all fences from src to dst > + * > + * @src: source of the fences, empty after function > + * @dst: destination for the fences > + * > + * Moves all fences from source to destination. All fences in destination are > + * freed and source is empty after the function call. > + */ > +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst) > +{ > + unsigned int i; > + > + amdgpu_sync_free(dst); > + > + for (i = 0; i < HASH_SIZE(src->fences); ++i) > + hlist_move_list(&src->fences[i], &dst->fences[i]); > +} > + > /** > * amdgpu_sync_push_to_job - push fences into job > * @sync: sync object to get the fences from > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > index 1504f5e7fc46..51eb4382c91e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > @@ -57,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > struct amdgpu_ring *ring); > struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); > int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); > +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst); > int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); > int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); > void amdgpu_sync_free(struct amdgpu_sync *sync);