On 2022-05-05 16:04, Alex Deucher wrote: > From: Likun Gao <Likun.Gao@xxxxxxx> > > Support memory to memory linear copy in PIO mode for LSDMA. > > Signed-off-by: Likun Gao <Likun.Gao@xxxxxxx> > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c | 26 ++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h | 5 +++ > drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c | 44 +++++++++++++++++++++++ > 3 files changed, 75 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > index af00a66f8282..3f1c674afe41 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > @@ -23,3 +23,29 @@ > > #include "amdgpu.h" > #include "amdgpu_lsdma.h" > + > +#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL > + > +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, > + uint64_t src_addr, > + uint64_t dst_addr, > + uint64_t mem_size) > +{ > + int ret; > + > + if (mem_size == 0) > + return -EINVAL; > + > + while(mem_size > 0) { Kernel style requires a space after the "while" and before the "(". > + uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE); > + > + ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size); > + if (ret) > + return ret; > + src_addr += current_copy_size; > + dst_addr += current_copy_size; > + mem_size -= current_copy_size; > + } > + > + return 0; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > index 4df4da423181..be397666e4c1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > @@ -30,6 +30,11 @@ struct amdgpu_lsdma { > > struct amdgpu_lsdma_funcs > { > + int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr, > + uint64_t dst_addr, uint64_t size); > }; > > +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr, > + uint64_t dst_addr, uint64_t mem_size); > + > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > index b611ade53be2..0d2bdd04bd76 100644 > --- a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > @@ -29,5 +29,49 @@ > #include "lsdma/lsdma_6_0_0_offset.h" > #include "lsdma/lsdma_6_0_0_sh_mask.h" > > +static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev, > + uint64_t src_addr, > + uint64_t dst_addr, > + uint64_t size) > +{ > + uint32_t usec_timeout = 5000; /* wait for 5ms */ > + uint32_t tmp, expect_val; > + int i; > + > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); > + > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); > + > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); > + > + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); > + > + expect_val = LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK; > + for (i = 0; i < usec_timeout; i++) { > + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_STATUS); > + if ((tmp & expect_val) == expect_val) > + break; > + udelay(1); > + } Does the hardware specify a minimum delay after the write to the LSDMA_PIO_COMMAND, and before we check the LSDMA_PIO_STATUS? At the moment the code above checks the status immediately after writing to LSDMA_PIO_COMMAND, and the copy wouldn't be completed. If the poll timeout is 1 us, as the loop shows us, maybe the command completion is larger than that (the time after writing to LSDMA_PIO_COMMAND and before checking LSDMA_PIO_STATUS)? > + > + if (i >= usec_timeout) { > + dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); > + return -ETIMEDOUT; > + } > + > + return 0; > +} > + > const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = { > + .copy_mem = lsdma_v6_0_copy_mem > }; Regards, -- Luben