On Tue, Aug 6, 2024 at 1:08 PM Alex Deucher <alexdeucher@xxxxxxxxx> wrote: > > On Tue, Aug 6, 2024 at 12:00 PM David (Ming Qiang) Wu <David.Wu3@xxxxxxx> wrote: > > > > Add JPEG IB command parser to ensure registers > > in the command are within the JPEG IP block. > > > > Signed-off-by: David (Ming Qiang) Wu <David.Wu3@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++ > > drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 58 +++++++++++++++++++++++- > > drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h | 7 ++- > > drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 1 + > > drivers/gpu/drm/amd/amdgpu/soc15d.h | 6 +++ > > 5 files changed, 73 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index 1e167d925b64..78b3c067fea7 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -1061,6 +1061,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, > > r = amdgpu_ring_parse_cs(ring, p, job, ib); > > if (r) > > return r; > > + > > + if (ib->sa_bo) > > + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); > > } else { > > ib->ptr = (uint32_t *)kptr; > > r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); > > diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c > > index ad524ddc9760..6ccf61d7c13d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c > > +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c > > @@ -782,7 +782,11 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, > > > > amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, > > 0, 0, PACKETJ_TYPE0)); > > - amdgpu_ring_write(ring, (vmid | (vmid << 4))); > > + > > + if (ring->funcs->parse_cs) > > + amdgpu_ring_write(ring, 0); > > + else > > + amdgpu_ring_write(ring, (vmid | (vmid << 4))); > > This part doesn't make sense to me. If we use vmid 0, we'll need to > also patch any addresses in the IB itself to use vmid 0 as well unless > there is a way to use a vmid 0 address for the IB base, but a > different vmid for the addresses in the IB. If it's the latter, then > we need to make sure that is set up and specify the vmid to use. David and I discussed this and this is handled in UVD_LMI_JPEG_VMID. With the below nits fixed, this patch is: Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > > > > > amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, > > 0, 0, PACKETJ_TYPE0)); > > @@ -1084,6 +1088,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { > > .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, > > .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, > > .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, > > + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, > > .emit_frame_size = > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + > > SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + > > @@ -1248,3 +1253,54 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) > > { > > adev->jpeg.ras = &jpeg_v4_0_3_ras; > > } > > + > > +/** > > + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser > > + * > > + * @parser: Command submission parser context > > + * @job: the job to parse > > + * @ib: the IB to parse > > + * > > + * Parse the command stream, return -EINVAL for invalid packet > > + * 0 otherwise > > + */ > > +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, > > + struct amdgpu_job *job, > > + struct amdgpu_ib *ib) > > +{ > > + uint32_t i, reg, res, cond, type; > > + > > + for (i = 0; i < ib->length_dw ; i += 2) { > > + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); > > + res = CP_PACKETJ_GET_RES(ib->ptr[i]); > > + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); > > + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); > > + > > + if(res) /* only support 0 at the moment */ > > space between if and (. > > > + return -EINVAL; > > + > > + switch (type) { > > + case PACKETJ_TYPE0: > > + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { > > + DRM_ERROR("Invalid packet [0x%08x]!\n", ib->ptr[i]); > > use dev_err() so we can differentiate between multiple GPUs. > > > + return -EINVAL; > > + } > > + break; > > + case PACKETJ_TYPE3: > > + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { > > + DRM_ERROR("Invalid packet [0x%08x]!\n", ib->ptr[i]); > > Same here. > > > + return -EINVAL; > > + } > > + break; > > + case PACKETJ_TYPE6: > > + if (ib->ptr[i] == CP_PACKETJ_NOP) > > + continue; > > + return -EINVAL; > > + default: > > + DRM_ERROR("Unknown packet type %d !\n", type); > > And here. > > > + return -EINVAL; > > + } > > + } > > + > > + return 0; > > +} > > diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h > > index 747a3e5f6856..71c54b294e15 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h > > +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h > > @@ -46,6 +46,9 @@ > > > > #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 > > > > +#define JPEG_REG_RANGE_START 0x4000 > > +#define JPEG_REG_RANGE_END 0x41c2 > > + > > extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; > > > > void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, > > @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); > > void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); > > void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, > > uint32_t val, uint32_t mask); > > - > > +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, > > + struct amdgpu_job *job, > > + struct amdgpu_ib *ib); > > #endif /* __JPEG_V4_0_3_H__ */ > > diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c > > index d694a276498a..f4daff90c770 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c > > @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { > > .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, > > .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, > > .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, > > + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, > > .emit_frame_size = > > SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + > > SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + > > diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h > > index 2357ff39323f..e74e1983da53 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h > > +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h > > @@ -76,6 +76,12 @@ > > ((cond & 0xF) << 24) | \ > > ((type & 0xF) << 28)) > > > > +#define CP_PACKETJ_NOP 0x60000000 > > +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) > > +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) > > +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) > > +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) > > + > > /* Packet 3 types */ > > #define PACKET3_NOP 0x10 > > #define PACKET3_SET_BASE 0x11 > > -- > > 2.34.1 > >