Protect the struct amdgpu_bo_list with a mutex. This is used during command submission in order to avoid buffer object corruption as recorded in the link below. Suggested-by: Christian König <christian.koenig@xxxxxxx> Cc: Alex Deucher <Alexander.Deucher@xxxxxxx> Cc: Andrey Grodzovsky <Andrey.Grodzovsky@xxxxxxx> Cc: Vitaly Prosyak <Vitaly.Prosyak@xxxxxxx> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2048 Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 4 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 31 +++++++++++++++++++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6ed..2168163aad2d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 044b41f0bfd9ce..717984d4de6858 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -48,6 +48,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 36ac1f1d11e6b4..0b2932c20ec777 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -517,6 +517,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -544,6 +546,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (!e->user_pages) { DRM_ERROR("kvmalloc_array failure\n"); r = -ENOMEM; + mutex_unlock(&p->bo_list->bo_list_mutex); goto out_free_user_pages; } @@ -551,6 +554,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (r) { kvfree(e->user_pages); e->user_pages = NULL; + mutex_unlock(&p->bo_list->bo_list_mutex); goto out_free_user_pages; } @@ -568,6 +572,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (unlikely(r != 0)) { if (r != -ERESTARTSYS) DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + mutex_unlock(&p->bo_list->bo_list_mutex); goto out_free_user_pages; } @@ -580,11 +585,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->chain = dma_fence_chain_alloc(); if (!e->chain) { r = -ENOMEM; + mutex_unlock(&p->bo_list->bo_list_mutex); goto error_validate; } } } + mutex_unlock(&p->bo_list->bo_list_mutex); + /* Move fence waiting after getting reservation lock of * PD root. Then there is no need on a ctx mutex lock. */ @@ -607,6 +615,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } + mutex_lock(&p->bo_list->bo_list_mutex); r = amdgpu_cs_list_validate(p, &duplicates); if (r) goto error_validate; @@ -614,6 +623,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = amdgpu_cs_list_validate(p, &p->validated); if (r) goto error_validate; + mutex_unlock(&p->bo_list->bo_list_mutex); amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, p->bytes_moved_vis); @@ -644,15 +654,18 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) { + mutex_lock(&p->bo_list->bo_list_mutex); amdgpu_bo_list_for_each_entry(e, p->bo_list) { dma_fence_chain_free(e->chain); e->chain = NULL; } ttm_eu_backoff_reservation(&p->ticket, &p->validated); + mutex_unlock(&p->bo_list->bo_list_mutex); } out_free_user_pages: if (r) { + mutex_lock(&p->bo_list->bo_list_mutex); amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -662,6 +675,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -704,6 +718,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, if (error && backoff) { struct amdgpu_bo_list_entry *e; + mutex_lock(&parser->bo_list->bo_list_mutex); amdgpu_bo_list_for_each_entry(e, parser->bo_list) { dma_fence_chain_free(e->chain); e->chain = NULL; @@ -711,6 +726,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); } for (i = 0; i < parser->num_post_deps; i++) { @@ -839,6 +855,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + mutex_lock(&p->bo_list->bo_list_mutex); amdgpu_bo_list_for_each_entry(e, p->bo_list) { /* ignore duplicates */ bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -850,13 +867,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } + mutex_unlock(&p->bo_list->bo_list_mutex); r = amdgpu_vm_handle_moved(adev, vm); if (r) @@ -874,6 +896,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ + mutex_lock(&p->bo_list->bo_list_mutex); amdgpu_bo_list_for_each_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); @@ -883,6 +906,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) amdgpu_vm_bo_invalidate(adev, bo, false); } + mutex_unlock(&p->bo_list->bo_list_mutex); } return amdgpu_cs_sync_rings(p); @@ -1249,6 +1273,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, * added to BOs. */ mutex_lock(&p->adev->notifier_lock); + mutex_lock(&p->bo_list->bo_list_mutex); /* If userptr are invalidated after amdgpu_cs_parser_bos(), return * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. @@ -1308,12 +1333,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); + mutex_unlock(&p->bo_list->bo_list_mutex); mutex_unlock(&p->adev->notifier_lock); return 0; error_abort: drm_sched_job_cleanup(&job->base); + mutex_unlock(&p->bo_list->bo_list_mutex); mutex_unlock(&p->adev->notifier_lock); error_unlock: -- 2.36.1.74.g277cf0bc36