Re: [PATCH 02/11] drm/amdgpu: add ring flag for no user submissions

Alex Deucher <alexdeucher@xxxxxxxxx> · Mon, 17 Mar 2025 14:27:38 -0400

On Mon, Mar 17, 2025 at 1:23 PM Marek Olšák <maraeo@xxxxxxxxx> wrote:
>
> Userspace needs a query that a queue IP type is supported. "available_rings" is used for that right now, but if that's 0, something else must indicate IP support.
>
> amd_ip_info::num_queues should be non-zero even when user queues are supported. The exact number doesn't matter with user queues.

How will mesa determine whether kernel queues are supported?  Can mesa
look at amd_ip_info::num_queues and if it's 0, check some new INFO
query to determine if user queues are available?  If
amd_ip_info::num_queues is always non-0, then it would be assumed that
the kernel supports kernel queues, which it may not.

Alex

>
> Marek
>
> On Mon, Mar 17, 2025 at 3:09 AM Liang, Prike <Prike.Liang@xxxxxxx> wrote:
>>
>> [Public]
>>
>> We might still need to export each ring's number correctly; otherwise, the Mesa driver will consider there's no available ring supported from the driver and then further assert before submitting the user queue.
>>
>> If we want to keep the ring number being zero, the Mesa driver may need an attachment change to allow the command submitted to the zero-ring number if the user queue is enabled.
>>
>> Hi @Olsak, Marek Do you think it's fine to have the attachment patch for the userq support? Except for such changes, maybe we also need to clean up the IB-related part.
>>
>> Regards,
>>       Prike
>>
>> > -----Original Message-----
>> > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Alex
>> > Deucher
>> > Sent: Thursday, March 13, 2025 10:41 PM
>> > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
>> > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Khatri, Sunil
>> > <Sunil.Khatri@xxxxxxx>
>> > Subject: [PATCH 02/11] drm/amdgpu: add ring flag for no user submissions
>> >
>> > This would be set by IPs which only accept submissions from the kernel, not
>> > userspace, such as when kernel queues are disabled. Don't expose the rings to
>> > userspace and reject any submissions in the CS IOCTL.
>> >
>> > Reviewed-by: Sunil Khatri<sunil.khatri@xxxxxxx>
>> > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
>> > ---
>> >  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 ++++
>> >  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 30 ++++++++++++++++--------
>> > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>> >  3 files changed, 25 insertions(+), 11 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > index 5df21529b3b13..5cc18034b75df 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > @@ -349,6 +349,10 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser
>> > *p,
>> >       ring = amdgpu_job_ring(job);
>> >       ib = &job->ibs[job->num_ibs++];
>> >
>> > +     /* submissions to kernel queus are disabled */
>> > +     if (ring->no_user_submission)
>> > +             return -EINVAL;
>> > +
>> >       /* MM engine doesn't support user fences */
>> >       if (p->uf_bo && ring->funcs->no_user_fence)
>> >               return -EINVAL;
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > index cd6eb7a3bc58a..3b7dfd56ccd0e 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > @@ -408,7 +408,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >       case AMDGPU_HW_IP_GFX:
>> >               type = AMD_IP_BLOCK_TYPE_GFX;
>> >               for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>> > -                     if (adev->gfx.gfx_ring[i].sched.ready)
>> > +                     if (adev->gfx.gfx_ring[i].sched.ready &&
>> > +                         !adev->gfx.gfx_ring[i].no_user_submission)
>> >                               ++num_rings;
>> >               ib_start_alignment = 32;
>> >               ib_size_alignment = 32;
>> > @@ -416,7 +417,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >       case AMDGPU_HW_IP_COMPUTE:
>> >               type = AMD_IP_BLOCK_TYPE_GFX;
>> >               for (i = 0; i < adev->gfx.num_compute_rings; i++)
>> > -                     if (adev->gfx.compute_ring[i].sched.ready)
>> > +                     if (adev->gfx.compute_ring[i].sched.ready &&
>> > +                         !adev->gfx.compute_ring[i].no_user_submission)
>> >                               ++num_rings;
>> >               ib_start_alignment = 32;
>> >               ib_size_alignment = 32;
>> > @@ -424,7 +426,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >       case AMDGPU_HW_IP_DMA:
>> >               type = AMD_IP_BLOCK_TYPE_SDMA;
>> >               for (i = 0; i < adev->sdma.num_instances; i++)
>> > -                     if (adev->sdma.instance[i].ring.sched.ready)
>> > +                     if (adev->sdma.instance[i].ring.sched.ready &&
>> > +                         !adev->gfx.gfx_ring[i].no_user_submission)
>> >                               ++num_rings;
>> >               ib_start_alignment = 256;
>> >               ib_size_alignment = 4;
>> > @@ -435,7 +438,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >                       if (adev->uvd.harvest_config & (1 << i))
>> >                               continue;
>> >
>> > -                     if (adev->uvd.inst[i].ring.sched.ready)
>> > +                     if (adev->uvd.inst[i].ring.sched.ready &&
>> > +                         !adev->uvd.inst[i].ring.no_user_submission)
>> >                               ++num_rings;
>> >               }
>> >               ib_start_alignment = 256;
>> > @@ -444,7 +448,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >       case AMDGPU_HW_IP_VCE:
>> >               type = AMD_IP_BLOCK_TYPE_VCE;
>> >               for (i = 0; i < adev->vce.num_rings; i++)
>> > -                     if (adev->vce.ring[i].sched.ready)
>> > +                     if (adev->vce.ring[i].sched.ready &&
>> > +                         !adev->vce.ring[i].no_user_submission)
>> >                               ++num_rings;
>> >               ib_start_alignment = 256;
>> >               ib_size_alignment = 4;
>> > @@ -456,7 +461,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >                               continue;
>> >
>> >                       for (j = 0; j < adev->uvd.num_enc_rings; j++)
>> > -                             if (adev->uvd.inst[i].ring_enc[j].sched.ready)
>> > +                             if (adev->uvd.inst[i].ring_enc[j].sched.ready &&
>> > +                                 !adev->uvd.inst[i].ring_enc[j].no_user_submission)
>> >                                       ++num_rings;
>> >               }
>> >               ib_start_alignment = 256;
>> > @@ -468,7 +474,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >                       if (adev->vcn.harvest_config & (1 << i))
>> >                               continue;
>> >
>> > -                     if (adev->vcn.inst[i].ring_dec.sched.ready)
>> > +                     if (adev->vcn.inst[i].ring_dec.sched.ready &&
>> > +                         !adev->vcn.inst[i].ring_dec.no_user_submission)
>> >                               ++num_rings;
>> >               }
>> >               ib_start_alignment = 256;
>> > @@ -481,7 +488,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >                               continue;
>> >
>> >                       for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++)
>> > -                             if (adev->vcn.inst[i].ring_enc[j].sched.ready)
>> > +                             if (adev->vcn.inst[i].ring_enc[j].sched.ready &&
>> > +                                 !adev->vcn.inst[i].ring_enc[j].no_user_submission)
>> >                                       ++num_rings;
>> >               }
>> >               ib_start_alignment = 256;
>> > @@ -496,7 +504,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >                               continue;
>> >
>> >                       for (j = 0; j < adev->jpeg.num_jpeg_rings; j++)
>> > -                             if (adev->jpeg.inst[i].ring_dec[j].sched.ready)
>> > +                             if (adev->jpeg.inst[i].ring_dec[j].sched.ready &&
>> > +                                 !adev->jpeg.inst[i].ring_dec[j].no_user_submission)
>> >                                       ++num_rings;
>> >               }
>> >               ib_start_alignment = 256;
>> > @@ -504,7 +513,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> > *adev,
>> >               break;
>> >       case AMDGPU_HW_IP_VPE:
>> >               type = AMD_IP_BLOCK_TYPE_VPE;
>> > -             if (adev->vpe.ring.sched.ready)
>> > +             if (adev->vpe.ring.sched.ready &&
>> > +                 !adev->vpe.ring.no_user_submission)
>> >                       ++num_rings;
>> >               ib_start_alignment = 256;
>> >               ib_size_alignment = 4;
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> > index b4fd1e17205e9..4a97afcb38b78 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> > @@ -297,6 +297,7 @@ struct amdgpu_ring {
>> >       struct dma_fence        *vmid_wait;
>> >       bool                    has_compute_vm_bug;
>> >       bool                    no_scheduler;
>> > +     bool                    no_user_submission;
>> >       int                     hw_prio;
>> >       unsigned                num_hw_submission;
>> >       atomic_t                *sched_score;
>> > @@ -310,7 +311,6 @@ struct amdgpu_ring {
>> >       unsigned int    entry_index;
>> >       /* store the cached rptr to restore after reset */
>> >       uint64_t cached_rptr;
>> > -
>> >  };
>> >
>> >  #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
>> > --
>> > 2.48.1
>>