Am 18.09.2017 um 21:18 schrieb Yong Zhao: > When max_bytes is not 8 bytes aligned and bo size is larger than > max_bytes, the last 8 bytes in a ttm node may be left unchanged. > For example, on pre SDMA 4.0, max_bytes = 0x1fffff, and the bo size > is 0x200000, the problem will happen. > > In order to fix the problem, we store the max nums of PTEs/PDEs > a single operation can set separately in amdgpu_vm_pte_funcs > structure. > > Change-Id: I37c588a57cb63f1a8251fb5ead2eff4b39e047c9 > Signed-off-by: Yong Zhao <yong.zhao at amd.com> Looks really good to me, patch is Reviewed-by: Christian König <christian.koenig at amd.com>. Bonus points for finding the right lines in amdgpu_vm_bo_update_mapping() and replace the hard coded "10" there with the value from amdgpu_vm_pte_funcs as well. Regards, Christian. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++++---- > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 +++ > 7 files changed, 25 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index a34c4cb..91bb111 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -309,6 +309,12 @@ struct amdgpu_vm_pte_funcs { > uint64_t pe, > uint64_t addr, unsigned count, > uint32_t incr, uint64_t flags); > + > + /* maximum nums of PTEs/PDEs in a single operation */ > + uint32_t set_max_nums_pte_pde; > + > + /* number of dw to reserve per operation */ > + unsigned set_pte_pde_num_dw; > }; > > /* provided by the gmc block */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 2180ed3..8685b0c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1556,8 +1556,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, > struct dma_fence **fence) > { > struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); > - /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ > - uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; > + uint32_t max_bytes = 8 * > + adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde; > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > > struct drm_mm_node *mm_node; > @@ -1589,8 +1589,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, > ++mm_node; > } > > - /* 10 double words for each SDMA_OP_PTEPDE cmd */ > - num_dw = num_loops * 10; > + /* num of dwords for each SDMA_OP_PTEPDE cmd */ > + num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; > > /* for IB padding */ > num_dw += 64; > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > index f508f4d..ff59351 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > @@ -1390,6 +1390,9 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { > .copy_pte = cik_sdma_vm_copy_pte, > .write_pte = cik_sdma_vm_write_pte, > .set_pte_pde = cik_sdma_vm_set_pte_pde, > + > + .set_max_nums_pte_pde = 0x1fffff >> 3, > + .set_pte_pde_num_dw = 10, > }; > > static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > index f2d0710..aec3586 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > @@ -1327,6 +1327,9 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { > .copy_pte = sdma_v2_4_vm_copy_pte, > .write_pte = sdma_v2_4_vm_write_pte, > .set_pte_pde = sdma_v2_4_vm_set_pte_pde, > + > + .set_max_nums_pte_pde = 0x1fffff >> 3, > + .set_pte_pde_num_dw = 10, > }; > > static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index 4e7fe07..7610272 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -1751,6 +1751,9 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { > .copy_pte = sdma_v3_0_vm_copy_pte, > .write_pte = sdma_v3_0_vm_write_pte, > .set_pte_pde = sdma_v3_0_vm_set_pte_pde, > + > + .set_max_nums_pte_pde = 0x3fffe0 >> 3, > + .set_pte_pde_num_dw = 10, > }; > > static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index fd7c72a..6e1e0c0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -1717,6 +1717,9 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { > .copy_pte = sdma_v4_0_vm_copy_pte, > .write_pte = sdma_v4_0_vm_write_pte, > .set_pte_pde = sdma_v4_0_vm_set_pte_pde, > + > + .set_max_nums_pte_pde = 0x400000 >> 3, > + .set_pte_pde_num_dw = 10, > }; > > static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) > diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c > index 112969f..2d3091f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c > +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c > @@ -890,6 +890,9 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { > .copy_pte = si_dma_vm_copy_pte, > .write_pte = si_dma_vm_write_pte, > .set_pte_pde = si_dma_vm_set_pte_pde, > + > + .set_max_nums_pte_pde = 0xffff8 >> 3, > + .set_pte_pde_num_dw = 9, > }; > > static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)