On 2022-05-05 16:04, Alex Deucher wrote: > From: Likun Gao <Likun.Gao@xxxxxxx> > > Support constant data filling in PIO mode for LSDMA. > > Signed-off-by: Likun Gao <Likun.Gao@xxxxxxx> > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c | 40 +++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h | 6 +++ > drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c | 62 +++++++++++++++++------ > 3 files changed, 92 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > index 3f1c674afe41..223c47d1cc1c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.c > @@ -26,6 +26,23 @@ > > #define AMDGPU_LSDMA_MAX_SIZE 0x2000000ULL > > +int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, > + uint32_t reg_index, uint32_t reg_val, > + uint32_t mask) > +{ > + uint32_t val; > + int i; > + > + for (i = 0; i < adev->usec_timeout; i++) { > + val = RREG32(reg_index); > + if ((val & mask) == reg_val) > + return 0; > + udelay(1); > + } > + > + return -ETIME; > +} > + > int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, > uint64_t src_addr, > uint64_t dst_addr, > @@ -49,3 +66,26 @@ int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, > > return 0; > } > + > +int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, > + uint64_t dst_addr, > + uint32_t data, > + uint64_t mem_size) > +{ > + int ret; > + > + if (mem_size == 0) > + return -EINVAL; > + > + while(mem_size > 0) { checkpatch.pl complains here for style. > + uint64_t current_fill_size = min(mem_size, AMDGPU_LSDMA_MAX_SIZE); > + > + ret = adev->lsdma.funcs->fill_mem(adev, dst_addr, data, current_fill_size); > + if (ret) > + return ret; > + dst_addr += current_fill_size; > + mem_size -= current_fill_size; > + } > + > + return 0; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > index be397666e4c1..9a29f18407b8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_lsdma.h > @@ -32,9 +32,15 @@ struct amdgpu_lsdma_funcs > { > int (*copy_mem)(struct amdgpu_device *adev, uint64_t src_addr, > uint64_t dst_addr, uint64_t size); > + int (*fill_mem)(struct amdgpu_device *adev, uint64_t dst_addr, > + uint32_t data, uint64_t size); > }; > > int amdgpu_lsdma_copy_mem(struct amdgpu_device *adev, uint64_t src_addr, > uint64_t dst_addr, uint64_t mem_size); > +int amdgpu_lsdma_fill_mem(struct amdgpu_device *adev, uint64_t dst_addr, > + uint32_t data, uint64_t mem_size); > +int amdgpu_lsdma_wait_for(struct amdgpu_device *adev, uint32_t reg_index, > + uint32_t reg_val, uint32_t mask); > > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > index 0d2bdd04bd76..b4adb94a080b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v6_0.c > @@ -29,14 +29,20 @@ > #include "lsdma/lsdma_6_0_0_offset.h" > #include "lsdma/lsdma_6_0_0_sh_mask.h" > > +static int lsdma_v6_0_wait_pio_status(struct amdgpu_device *adev) > +{ > + return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS), > + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK, > + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK); > +} > + > static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev, > uint64_t src_addr, > uint64_t dst_addr, > uint64_t size) > { > - uint32_t usec_timeout = 5000; /* wait for 5ms */ > - uint32_t tmp, expect_val; > - int i; > + int ret; > + uint32_t tmp; > > WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); > WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); > @@ -56,22 +62,46 @@ static int lsdma_v6_0_copy_mem(struct amdgpu_device *adev, > tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); > WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); > > - expect_val = LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK; > - for (i = 0; i < usec_timeout; i++) { > - tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_STATUS); > - if ((tmp & expect_val) == expect_val) > - break; > - udelay(1); > - } > - > - if (i >= usec_timeout) { > + ret = lsdma_v6_0_wait_pio_status(adev); Similarly here. Shouldn't we wait a minimum command completion time before starting to (immediatly) poll? (perhaps not, but I've not seen the HW spec, if it is specified a minimum command wating time before polling for compeltion (whose poll time interval would be different (smaller)). > + if (ret) > dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); > - return -ETIMEDOUT; > - } > > - return 0; > + return ret; > +} > + > +static int lsdma_v6_0_fill_mem(struct amdgpu_device *adev, > + uint64_t dst_addr, > + uint32_t data, > + uint64_t size) > +{ > + int ret; > + uint32_t tmp; > + > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data); > + > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); > + > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); > + > + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); > + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1); > + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); > + > + ret = lsdma_v6_0_wait_pio_status(adev); > + if (ret) > + dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n"); > + > + return ret; > } > > const struct amdgpu_lsdma_funcs lsdma_v6_0_funcs = { > - .copy_mem = lsdma_v6_0_copy_mem > + .copy_mem = lsdma_v6_0_copy_mem, > + .fill_mem = lsdma_v6_0_fill_mem > }; Regards, -- Luben