Re: [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





Bas Nieuwenhuizen <bas@xxxxxxxxxxxxxxxxxxx> ezt írta (időpont: 2023. ápr. 11., Ke 10:25):
On Tue, Apr 11, 2023 at 10:10 AM Christian König
<christian.koenig@xxxxxxx> wrote:
>
> Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
> > We need to introduce a new version of the info struct because
> > libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info
> > so the mesa<->libdrm_amdgpu interface can't handle increasing the
> > size.
> >
> > This info would be used by radv to figure out when we need to
> > split a submission into multiple submissions. radv currently has
> > a limit of 192 which seems to work for most gfx submissions, but
> > is way too high for e.g. compute or sdma.
>
> Why do you need so many IBs in the first place?
>
> You can use sub-IBs since R600 and newer hw even supports a JUMP command
> to chain IBs together IIRC.

Couple of reasons:

1) We can't reliably use sub-IBs (both because on GFX6-7 there are
indirect draw commands that cannot be used in sub-IBs and because the
Vulkan API exposes sub-IBs, so we can have the primary IBs be sub-IBs
already)

Furthermore, only the GFX queue supports the "IB2" packet that is used to implement sub-IBs.

(The same packet hangs the compute queue and is documented as not working in the PAL source code. Other queues don't seem to have a packet for this purpose.)

2) We believed GFX6 may not support chaining. (Then again, it turns
out the GFX7+ packet may just work on GFX6?
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22406)

I was actually quite surprised when I found this out. Mesa developers seem to have believed that this is not possible on GFX6. I'd love to get some more context on this, did it always work or was it added in a FW update?

3) Chaining doesn't work if the IB may be in flight multiple times in
a different sequence.

Additionally, chaining also doesn't work on any queue other than GFX and compute. As far as we know, SDMA and the various encoder/decoder queues don't have a packet for chaining.

Christian, please let us know if we are wrong about this.


Best regards,
Timur




We try to chain when we can but (2) and (3) are cases where we
fallback to submitting multiple IBs.

>
> For the kernel UAPI you only need separate IBs if you have separate
> properties on them, E.g. preamble or submitting to a different engine.
>
> Everything else just needs additional CPU overhead in the IOCTL.
>
> Regards,
> Christian.
>
> >
> > Because the kernel handling is just fine we can just use the v2
> > everywhere and have the return_size do the versioning for us,
> > with userspace interpreting 0 as unknown.
> >
> > Userspace implementation:
> > https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
> > https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection
> >
> > v2: Use base member in the new struct.
> >
> > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
> > Signed-off-by: Bas Nieuwenhuizen <bas@xxxxxxxxxxxxxxxxxxx>
> > Cc: David Airlie <airlied@xxxxxxxxx>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31 ++++++++++++++-----------
> >   include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
> >   2 files changed, 31 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index 89689b940493..5b575e1aef1a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
> >
> >   static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >                            struct drm_amdgpu_info *info,
> > -                          struct drm_amdgpu_info_hw_ip *result)
> > +                          struct drm_amdgpu_info_hw_ip2 *result)
> >   {
> >       uint32_t ib_start_alignment = 0;
> >       uint32_t ib_size_alignment = 0;
> > @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >               return -EINVAL;
> >       }
> >
> > +     result->max_ibs = UINT_MAX;
> >       for (i = 0; i < adev->num_rings; ++i) {
> >               /* Note that this uses that ring types alias the equivalent
> >                * HW IP exposes to userspace.
> > @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >               if (adev->rings[i]->funcs->type == info->query_hw_ip.type &&
> >                   adev->rings[i]->sched.ready) {
> >                       ++num_rings;
> > +                     result->max_ibs = min(result->max_ibs,
> > +                                           adev->rings[i]->max_ibs);
> >               }
> >       }
> >
> > @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >       num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
> >                       num_rings);
> >
> > -     result->hw_ip_version_major = adev->ip_blocks[i].version->major;
> > -     result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
> > +     result->base.hw_ip_version_major = adev->ip_blocks[i].version->major;
> > +     result->base.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
> >
> >       if (adev->asic_type >= CHIP_VEGA10) {
> >               switch (type) {
> >               case AMD_IP_BLOCK_TYPE_GFX:
> > -                     result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[GC_HWIP][0];
> >                       break;
> >               case AMD_IP_BLOCK_TYPE_SDMA:
> > -                     result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
> >                       break;
> >               case AMD_IP_BLOCK_TYPE_UVD:
> >               case AMD_IP_BLOCK_TYPE_VCN:
> >               case AMD_IP_BLOCK_TYPE_JPEG:
> > -                     result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
> >                       break;
> >               case AMD_IP_BLOCK_TYPE_VCE:
> > -                     result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
> >                       break;
> >               default:
> > -                     result->ip_discovery_version = 0;
> > +                     result->base.ip_discovery_version = 0;
> >                       break;
> >               }
> >       } else {
> > -             result->ip_discovery_version = 0;
> > +             result->base.ip_discovery_version = 0;
> >       }
> > -     result->capabilities_flags = 0;
> > -     result->available_rings = (1 << num_rings) - 1;
> > -     result->ib_start_alignment = ib_start_alignment;
> > -     result->ib_size_alignment = ib_size_alignment;
> > +     result->base.capabilities_flags = 0;
> > +     result->base.available_rings = (1 << num_rings) - 1;
> > +     result->base.ib_start_alignment = ib_start_alignment;
> > +     result->base.ib_size_alignment = ib_size_alignment;
> >       return 0;
> >   }
> >
> > @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> >               }
> >               return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
> >       case AMDGPU_INFO_HW_IP_INFO: {
> > -             struct drm_amdgpu_info_hw_ip ip = {};
> > +             struct drm_amdgpu_info_hw_ip2 ip = {};
> >               int ret;
> >
> >               ret = amdgpu_hw_ip_info(adev, info, &ip);
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index b6eb90df5d05..6b9e35b6f747 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
> >       __u32 enabled_rb_pipes_mask_hi;
> >   };
> >
> > +/* The size of this struct cannot be increased for compatibility reasons, use
> > + * struct drm_amdgpu_info_hw_ip2 instead.
> > + */
> >   struct drm_amdgpu_info_hw_ip {
> >       /** Version of h/w IP */
> >       __u32  hw_ip_version_major;
> > @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
> >       __u32  ip_discovery_version;
> >   };
> >
> > +struct drm_amdgpu_info_hw_ip2 {
> > +     /** Previous version of the struct */
> > +     struct drm_amdgpu_info_hw_ip  base;
> > +     /** The maximum number of IBs one can submit in a single submission
> > +      * ioctl. (When using gang submit: this is per IP type)
> > +      */
> > +     __u32  max_ibs;
> > +     /** padding to 64bit for arch differences */
> > +     __u32  pad;
> > +};
> > +
> >   struct drm_amdgpu_info_num_handles {
> >       /** Max handles as supported by firmware for UVD */
> >       __u32  uvd_max_handles;
>

[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux