On 2022-05-06 10:22, Alex Deucher wrote: > On Fri, May 6, 2022 at 1:02 AM Luben Tuikov <luben.tuikov@xxxxxxx> wrote: >> >> >> >> On 2022-05-05 16:04, Alex Deucher wrote: >>> From: Likun Gao <Likun.Gao@xxxxxxx> >>> >>> Support memory to memory linear copy in PIO mode for LSDMA. >>> >>> Signed-off-by: Likun Gao <Likun.Gao@xxxxxxx> >>> Reviewed-by: Christian König <christian.koenig@xxxxxxx> >>> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c | 26 ++++++++++++++ >>> drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h | 5 +++ >>> drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c | 44 +++++++++++++++++++++++ >>> 3 files changed, 75 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c >>> index af00a66f8282..3f1c674afe41 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c >>> @@ -23,3 +23,29 @@ >>> >>> #include "amdgpu.h" >>> #include "amdgpu_lsdma.h" >>> + >>> +#define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL >>> + >>> +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, >>> + uint64_t src_addr, >>> + uint64_t dst_addr, >>> + uint64_t mem_size) >>> +{ >>> + int ret; >>> + >>> + if (mem_size == 0) >>> + return -EINVAL; >>> + >>> + while(mem_size > 0) { >> >> Kernel style requires a space after the "while" and before the "(". >> >>> + uint64_t current_copy_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE); >>> + >>> + ret = adev->lsdma.funcs->copy_mem(adev, src_addr, dst_addr, current_copy_size); >>> + if (ret) >>> + return ret; >>> + src_addr += current_copy_size; >>> + dst_addr += current_copy_size; >>> + mem_size -= current_copy_size; >>> + } >>> + >>> + return 0; >>> +} >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h >>> index 4df4da423181..be397666e4c1 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h >>> @@ -30,6 +30,11 @@ struct amdgpu_lsdma { >>> >>> struct amdgpu_lsdma_funcs >>> { >>> + int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr, >>> + uint64_t dst_addr, uint64_t size); >>> }; >>> >>> +int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr, >>> + uint64_t dst_addr, uint64_t mem_size); >>> + >>> #endif >>> diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c >>> index b611ade53be2..0d2bdd04bd76 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c >>> @@ -29,5 +29,49 @@ >>> #include "lsdma/lsdma_6_0_0_offset.h" >>> #include "lsdma/lsdma_6_0_0_sh_mask.h" >>> >>> +static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev, >>> + uint64_t src_addr, >>> + uint64_t dst_addr, >>> + uint64_t size) >>> +{ >>> + uint32_t usec_timeout = 5000; /* wait for 5ms */ >>> + uint32_t tmp, expect_val; >>> + int i; >>> + >>> + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); >>> + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); >>> + >>> + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); >>> + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); >>> + >>> + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); >>> + >>> + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); >>> + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); >>> + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); >>> + >>> + expect_val = LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK; >>> + for (i = 0; i < usec_timeout; i++) { >>> + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_STATUS); >>> + if ((tmp & expect_val) == expect_val) >>> + break; >>> + udelay(1); >>> + } >> >> Does the hardware specify a minimum delay after the write to the LSDMA_PIO_COMMAND, >> and before we check the LSDMA_PIO_STATUS? > > I'm not sure, but it should be pretty minimal. My point is that there should be a delay at least as large as the polling delay, after writing to the command register and before reading the status register. So that should be at least a 1 us. There should be a udelay(1) after writing the command to the LSDMA_PIO_COMMAND register, before the for () loop begins. Regards, Luben > >> >> At the moment the code above checks the status immediately after writing to >> LSDMA_PIO_COMMAND, and the copy wouldn't be completed. >> >> If the poll timeout is 1 us, as the loop shows us, maybe the command completion >> is larger than that (the time after writing to LSDMA_PIO_COMMAND and before >> checking LSDMA_PIO_STATUS)? > > The time it takes for the copy will be dependent on the amount of data > there is to copy. While the command is probably not complete on the > first read of the status register, it may be required to make sure the > previous write goes through. > > Alex > >> >>> + >>> + if (i >= usec_timeout) { >>> + dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); >>> + return -ETIMEDOUT; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = { >>> + .copy_mem = lsdma_v6_0_copy_mem >>> }; >> >> Regards, >> -- >> Luben Regards, -- Luben