Am 26.10.19 um 00:41 schrieb Tuikov, Luben: > Simplify padding calculations. > > v2: Comment update and spacing. > > Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> Reviewed-by: Christian König <christian.koenig@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 ++++++++++++----- > 5 files changed, 20 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > index c45304f1047c..4af9acc2dc4f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c > @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, > u32 extra_bits = vmid & 0xf; > > /* IB packet must end on a 8 DW boundary */ > - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); > + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7); > > amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); > amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ > @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) > u32 pad_count; > int i; > > - pad_count = (8 - (ib->length_dw & 0x7)) % 8; > + pad_count = (-ib->length_dw) & 7; > for (i = 0; i < pad_count; i++) > if (sdma && sdma->burst_nop && (i == 0)) > ib->ptr[ib->length_dw++] = > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > index a10175838013..b6af67f6f214 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c > @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > > /* IB packet must end on a 8 DW boundary */ > - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); > + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); > > amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); > @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib > u32 pad_count; > int i; > > - pad_count = (8 - (ib->length_dw & 0x7)) % 8; > + pad_count = (-ib->length_dw) & 7; > for (i = 0; i < pad_count; i++) > if (sdma && sdma->burst_nop && (i == 0)) > ib->ptr[ib->length_dw++] = > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > index 5f4e2c616241..cd3ebed46d05 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c > @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > > /* IB packet must end on a 8 DW boundary */ > - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); > + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); > > amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); > @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib > u32 pad_count; > int i; > > - pad_count = (8 - (ib->length_dw & 0x7)) % 8; > + pad_count = (-ib->length_dw) & 7; > for (i = 0; i < pad_count; i++) > if (sdma && sdma->burst_nop && (i == 0)) > ib->ptr[ib->length_dw++] = > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index 45bd538ba97e..8ce15056ee4f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > > /* IB packet must end on a 8 DW boundary */ > - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); > + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); > > amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); > @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib > u32 pad_count; > int i; > > - pad_count = (8 - (ib->length_dw & 0x7)) % 8; > + pad_count = (-ib->length_dw) & 7; > for (i = 0; i < pad_count; i++) > if (sdma && sdma->burst_nop && (i == 0)) > ib->ptr[ib->length_dw++] = > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > index 0c41b4fdc58b..d117bde3f29a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c > @@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, > unsigned vmid = AMDGPU_JOB_GET_VMID(job); > uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); > > - /* IB packet must end on a 8 DW boundary */ > - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); > + /* An IB packet must end on a 8 DW boundary--the next dword > + * must be on a 8-dword boundary. Our IB packet below is 6 > + * dwords long, thus add x number of NOPs, such that, in > + * modular arithmetic, > + * wptr + 6 + x = 8k, k >= 0, which in C is, > + * (wptr + 6 + x) % 8 = 0. > + * The expression below, is a solution of x. > + */ > + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); > > amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); > @@ -1086,10 +1093,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, > } > > /** > - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw > - * > + * sdma_v5_0_ring_pad_ib - pad the IB > * @ib: indirect buffer to fill with padding > * > + * Pad the IB with NOPs to a boundary multiple of 8. > */ > static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) > { > @@ -1097,7 +1104,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib > u32 pad_count; > int i; > > - pad_count = (8 - (ib->length_dw & 0x7)) % 8; > + pad_count = (-ib->length_dw) & 0x7; > for (i = 0; i < pad_count; i++) > if (sdma && sdma->burst_nop && (i == 0)) > ib->ptr[ib->length_dw++] = _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx