RE: [PATCH v2 3/3] drm/amdgpu/jpeg: support for sriov cpx mode

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - General]

Reviewed-by: Leo Liu <leo.liu@xxxxxxx>

> -----Original Message-----
> From: Dhume, Samir <Samir.Dhume@xxxxxxx>
> Sent: Friday, March 15, 2024 3:51 PM
> To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
> Cc: Dhume, Samir <Samir.Dhume@xxxxxxx>; Lazar, Lijo
> <Lijo.Lazar@xxxxxxx>; Wan, Gavin <Gavin.Wan@xxxxxxx>; Liu, Leo
> <Leo.Liu@xxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx>
> Subject: [PATCH v2 3/3] drm/amdgpu/jpeg: support for sriov cpx mode
>
> In SRIOV CPX mode, each VF has 4 jpeg engines. The even- numbered VFs point
> to JPEG0 block of the AID and the odd- numbered VFs point to the JPEG1 block.
>
>                     Even-numbered VFs     Odd numbered VFs
>
> VCN doorbell 0      VCN Decode ring       VCN Decode ring
> VCN doorbell 1-3      Reserved          Reserved
> VCN doorbell 4                JPEG0-0 ring
> VCN doorbell 5                JPEG0-1 ring
> VCN doorbell 6                JPEG0-2 ring
> VCN doorbell 7                JPEG0-3 ring
> VCN doorbell 8                            JPEG1-0 ring
> VCN doorbell 9                            JPEG1-1 ring
> VCN doorbell 10                           JPEG1-2 ring
> VCN doorbell 11                           JPEG1-3 ring
>
> Changes involve
> 1. sriov cpx mode - 4 rings
> 2. sriov cpx mode for odd numbered VFs - register correct src-ids (starting with
> JPEG4). Map src-id to correct instance in interrupt- handler.
>
> v2:
> 1. removed mmio access from interrupt handler. Use xcc_mask to detect cpx
> mode.
> 2. remove unneccessary sriov variables
>
> Signed-off-by: Samir Dhume <samir.dhume@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 60 +++++++++++++++++++++-
> --
>  1 file changed, 53 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> index 32caeb37cef9..d95ca797412c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
> @@ -68,6 +68,11 @@ static int jpeg_v4_0_3_early_init(void *handle)
>
>       adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS;
>
> +     /* check for sriov cpx mode */
> +     if (amdgpu_sriov_vf(adev))
> +             if (adev->gfx.xcc_mask == 0x1)
> +                     adev->jpeg.num_jpeg_rings = 4;
> +
>       jpeg_v4_0_3_set_dec_ring_funcs(adev);
>       jpeg_v4_0_3_set_irq_funcs(adev);
>       jpeg_v4_0_3_set_ras_funcs(adev);
> @@ -87,11 +92,25 @@ static int jpeg_v4_0_3_sw_init(void *handle)
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>       struct amdgpu_ring *ring;
>       int i, j, r, jpeg_inst;
> +     bool sriov_cpx_odd = false;
> +
> +     /* check for sriov cpx mode odd/even numbered vfs */
> +     if (amdgpu_sriov_vf(adev)) {
> +             if (adev->gfx.xcc_mask == 0x1) {
> +                     if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
> +                             sriov_cpx_odd = true;
> +             }
> +     }
>
>       for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
>               /* JPEG TRAP */
> -             r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
> +             if (!sriov_cpx_odd)
> +                     r = amdgpu_irq_add_id(adev,
> SOC15_IH_CLIENTID_VCN,
>                               amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst-
> >irq);
> +             else
> +                     r = amdgpu_irq_add_id(adev,
> SOC15_IH_CLIENTID_VCN,
> +                             amdgpu_ih_srcid_jpeg[j+4], &adev->jpeg.inst-
> >irq);
> +
>               if (r)
>                       return r;
>       }
> @@ -116,10 +135,14 @@ static int jpeg_v4_0_3_sw_init(void *handle)
>                                       (adev-
> >doorbell_index.vcn.vcn_ring0_1 << 1) +
>                                       1 + j + 9 * jpeg_inst;
>                       } else {
> -                             if (j < 4)
> +                             if ((j < 4) && (!sriov_cpx_odd))
>                                       ring->doorbell_index =
>                                               (adev-
> >doorbell_index.vcn.vcn_ring0_1 << 1) +
>                                               4 + j + 32 * jpeg_inst;
> +                             else if (sriov_cpx_odd)
> +                                     ring->doorbell_index =
> +                                             (adev-
> >doorbell_index.vcn.vcn_ring0_1 << 1) +
> +                                             12 + j + 32 * jpeg_inst;
>                               else
>                                       ring->doorbell_index =
>                                               (adev-
> >doorbell_index.vcn.vcn_ring0_1 << 1) + @@ -186,6 +209,7 @@ static int
> jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev)
>       uint32_t size, size_dw, item_offset;
>       uint32_t init_status;
>       int i, j, jpeg_inst;
> +     bool cpx_odd = false;
>
>       struct mmsch_v4_0_cmd_direct_write
>               direct_wt = { {0} };
> @@ -197,6 +221,12 @@ static int jpeg_v4_0_3_start_sriov(struct
> amdgpu_device *adev)
>       end.cmd_header.command_type =
>               MMSCH_COMMAND__END;
>
> +     /* check for cpx mode odd/even numbered vf */
> +     if (adev->gfx.xcc_mask == 0x1) {
> +             if (adev->gfx.funcs->get_xcc_id(adev, 0) & 0x1)
> +                     cpx_odd = true;
> +     }
> +
>       for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>               jpeg_inst = GET_INST(JPEG, i);
>
> @@ -220,10 +250,14 @@ static int jpeg_v4_0_3_start_sriov(struct
> amdgpu_device *adev)
>                       tmp = SOC15_REG_OFFSET(JPEG, 0,
> regUVD_JRBC0_UVD_JRBC_RB_SIZE);
>                       MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring-
> >ring_size / 4);
>
> -                     if (j <= 3) {
> +                     if ((j <= 3) && (!cpx_odd)) {
>                               header.mjpegdec0[j].table_offset =
> item_offset;
>                               header.mjpegdec0[j].init_status = 0;
>                               header.mjpegdec0[j].table_size = table_size;
> +                     } else if (cpx_odd) {
> +                             header.mjpegdec1[j].table_offset =
> item_offset;
> +                             header.mjpegdec1[j].init_status = 0;
> +                             header.mjpegdec1[j].table_size = table_size;
>                       } else {
>                               header.mjpegdec1[j - 4].table_offset =
> item_offset;
>                               header.mjpegdec1[j - 4].init_status = 0; @@ -
> 1015,16 +1049,28 @@ static int jpeg_v4_0_3_process_interrupt(struct
> amdgpu_device *adev,
>               amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]);
>               break;
>       case VCN_4_0__SRCID__JPEG4_DECODE:
> -             amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]);
> +             if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[0]);
> +             else
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[4]);
>               break;
>       case VCN_4_0__SRCID__JPEG5_DECODE:
> -             amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]);
> +             if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[1]);
> +             else
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[5]);
>               break;
>       case VCN_4_0__SRCID__JPEG6_DECODE:
> -             amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]);
> +             if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[2]);
> +             else
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[6]);
>               break;
>       case VCN_4_0__SRCID__JPEG7_DECODE:
> -             amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]);
> +             if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1))
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[3]);
> +             else
> +                     amdgpu_fence_process(&adev-
> >jpeg.inst[inst].ring_dec[7]);
>               break;
>       default:
>               DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d
> %d\n",
> --
> 2.34.1





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux