On Fri, May 6, 2022 at 1:02 AM Luben Tuikov <luben.tuikov@xxxxxxx> wrote: > > > > On 2022-05-05 16:04, Alex Deucher wrote: > > From: Likun Gao <Likun.Gao@xxxxxxx> > > > > Support memory to memory linear copy in PIO mode for LSDMA. > > > > Signed-off-by: Likun Gao <Likun.Gao@xxxxxxx> > > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c | 26 ++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h | 5 +++ > > drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c | 44 +++++++++++++++++++++++ > > 3 files changed, 75 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > > index af00a66f8282..3f1c674afe41 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > > @@ -23,3 +23,29 @@ > > > > #include "amdgpu.h" > > #include "amdgpu_lsdma.h" > > + > > +#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL > > + > > +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, > > + uint64_t src_addr, > > + uint64_t dst_addr, > > + uint64_t mem_size) > > +{ > > + int ret; > > + > > + if (mem_size == 0) > > + return -EINVAL; > > + > > + while(mem_size > 0) { > > Kernel style requires a space after the "while" and before the "(". > > > + uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE); > > + > > + ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size); > > + if (ret) > > + return ret; > > + src_addr += current_copy_size; > > + dst_addr += current_copy_size; > > + mem_size -= current_copy_size; > > + } > > + > > + return 0; > > +} > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > > index 4df4da423181..be397666e4c1 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > > @@ -30,6 +30,11 @@ struct amdgpu_lsdma { > > > > struct amdgpu_lsdma_funcs > > { > > + int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr, > > + uint64_t dst_addr, uint64_t size); > > }; > > > > +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr, > > + uint64_t dst_addr, uint64_t mem_size); > > + > > #endif > > diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > > index b611ade53be2..0d2bdd04bd76 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > > @@ -29,5 +29,49 @@ > > #include "lsdma/lsdma_6_0_0_offset.h" > > #include "lsdma/lsdma_6_0_0_sh_mask.h" > > > > +static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev, > > + uint64_t src_addr, > > + uint64_t dst_addr, > > + uint64_t size) > > +{ > > + uint32_t usec_timeout = 5000; /* wait for 5ms */ > > + uint32_t tmp, expect_val; > > + int i; > > + > > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); > > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); > > + > > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); > > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); > > + > > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); > > + > > + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); > > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); > > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); > > + > > + expect_val = LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK; > > + for (i = 0; i < usec_timeout; i++) { > > + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_STATUS); > > + if ((tmp & expect_val) == expect_val) > > + break; > > + udelay(1); > > + } > > Does the hardware specify a minimum delay after the write to the LSDMA_PIO_COMMAND, > and before we check the LSDMA_PIO_STATUS? I'm not sure, but it should be pretty minimal. > > At the moment the code above checks the status immediately after writing to > LSDMA_PIO_COMMAND, and the copy wouldn't be completed. > > If the poll timeout is 1 us, as the loop shows us, maybe the command completion > is larger than that (the time after writing to LSDMA_PIO_COMMAND and before > checking LSDMA_PIO_STATUS)? The time it takes for the copy will be dependent on the amount of data there is to copy. While the command is probably not complete on the first read of the status register, it may be required to make sure the previous write goes through. Alex > > > + > > + if (i >= usec_timeout) { > > + dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); > > + return -ETIMEDOUT; > > + } > > + > > + return 0; > > +} > > + > > const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = { > > + .copy_mem = lsdma_v6_0_copy_mem > > }; > > Regards, > -- > Luben