[AMD Public Use] Hi Christian, Thanks for your patience. Unluckily, after applying below patch, vulkan cts test on my side is negative. The same gfxhub page fault and kernel bug along with amdgpu_vm_update_ptes calltrace is observed. I will send the full log to you privately soon. I suggest holding on this patch before rooting cause it. Regards, Guchun -----Original Message----- From: Das, Nirmoy <Nirmoy.Das@xxxxxxx> Sent: Tuesday, March 23, 2021 5:09 PM To: Chen, Guchun <Guchun.Chen@xxxxxxx>; Christian König <ckoenig.leichtzumerken@xxxxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Das, Nirmoy <Nirmoy.Das@xxxxxxx> Subject: Re: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code"" I tested ./piglit run opengl results/test multiple times. Once I got gfx time out error but without kernel freeze. I can't reproduce it any more. Regards, Nirmoy On 3/22/21 2:11 PM, Chen, Guchun wrote: > [AMD Public Use] > > Hi Christian, > > I will conduct one stress test for this tomorrow. Would you mind waiting for my ack before submitting? > > Regards, > Guchun > > -----Original Message----- > From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx> > Sent: Monday, March 22, 2021 8:41 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Chen, Guchun <Guchun.Chen@xxxxxxx>; Das, Nirmoy > <Nirmoy.Das@xxxxxxx> > Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code"" > > Now that we found the underlying problem we can re-apply this patch. > > This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c. > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++----------------- > 1 file changed, 18 insertions(+), 37 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 9268db1172bd..bc3951b71079 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -37,6 +37,7 @@ > #include "amdgpu_gmc.h" > #include "amdgpu_xgmi.h" > #include "amdgpu_dma_buf.h" > +#include "amdgpu_res_cursor.h" > > /** > * DOC: GPUVM > @@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, > * @last: last mapped entry > * @flags: flags for the entries > * @offset: offset into nodes and pages_addr > - * @nodes: array of drm_mm_nodes with the MC addresses > + * @res: ttm_resource to map > * @pages_addr: DMA addresses to use for mapping > * @fence: optional resulting fence > * > @@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > bool unlocked, struct dma_resv *resv, > uint64_t start, uint64_t last, > uint64_t flags, uint64_t offset, > - struct drm_mm_node *nodes, > + struct ttm_resource *res, > dma_addr_t *pages_addr, > struct dma_fence **fence) > { > struct amdgpu_vm_update_params params; > + struct amdgpu_res_cursor cursor; > enum amdgpu_sync_mode sync_mode; > - uint64_t pfn; > int r; > > memset(¶ms, 0, sizeof(params)); @@ -1622,14 +1623,6 @@ static > int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > else > sync_mode = AMDGPU_SYNC_EXPLICIT; > > - pfn = offset >> PAGE_SHIFT; > - if (nodes) { > - while (pfn >= nodes->size) { > - pfn -= nodes->size; > - ++nodes; > - } > - } > - > amdgpu_vm_eviction_lock(vm); > if (vm->evicting) { > r = -EBUSY; > @@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > if (r) > goto error_unlock; > > - do { > + amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, > + &cursor); > + while (cursor.remaining) { > uint64_t tmp, num_entries, addr; > > - > - num_entries = last - start + 1; > - if (nodes) { > - addr = nodes->start << PAGE_SHIFT; > - num_entries = min((nodes->size - pfn) * > - AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries); > - } else { > - addr = 0; > - } > - > + num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT; > if (pages_addr) { > bool contiguous = true; > > if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { > + uint64_t pfn = cursor.start >> PAGE_SHIFT; > uint64_t count; > > contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > } > > if (!contiguous) { > - addr = pfn << PAGE_SHIFT; > + addr = cursor.start; > params.pages_addr = pages_addr; > } else { > - addr = pages_addr[pfn]; > + addr = pages_addr[cursor.start >> PAGE_SHIFT]; > params.pages_addr = NULL; > } > > } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { > - addr += bo_adev->vm_manager.vram_base_offset; > - addr += pfn << PAGE_SHIFT; > + addr = bo_adev->vm_manager.vram_base_offset + > + cursor.start; > + } else { > + addr = 0; > } > > tmp = start + num_entries; > @@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, > if (r) > goto error_unlock; > > - pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; > - if (nodes && nodes->size == pfn) { > - pfn = 0; > - ++nodes; > - } > + amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); > start = tmp; > - > - } while (unlikely(start != last + 1)); > + }; > > r = vm->update_funcs->commit(¶ms, fence); > > @@ -1737,7 +1721,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, > struct amdgpu_bo_va_mapping *mapping; > dma_addr_t *pages_addr = NULL; > struct ttm_resource *mem; > - struct drm_mm_node *nodes; > struct dma_fence **last_update; > struct dma_resv *resv; > uint64_t flags; > @@ -1746,7 +1729,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device > *adev, struct amdgpu_bo_va *bo_va, > > if (clear || !bo) { > mem = NULL; > - nodes = NULL; > resv = vm->root.base.bo->tbo.base.resv; > } else { > struct drm_gem_object *obj = &bo->tbo.base; @@ -1761,7 +1743,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, > bo = gem_to_amdgpu_bo(gobj); > } > mem = &bo->tbo.mem; > - nodes = mem->mm_node; > if (mem->mem_type == TTM_PL_TT) > pages_addr = bo->tbo.ttm->dma_address; > } > @@ -1810,7 +1791,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, > r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, > resv, mapping->start, > mapping->last, update_flags, > - mapping->offset, nodes, > + mapping->offset, mem, > pages_addr, last_update); > if (r) > return r; > -- > 2.25.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx