On Tue, Apr 11, 2023 at 10:10 AM Christian König <christian.koenig@xxxxxxx> wrote: > > Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen: > > We need to introduce a new version of the info struct because > > libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info > > so the mesa<->libdrm_amdgpu interface can't handle increasing the > > size. > > > > This info would be used by radv to figure out when we need to > > split a submission into multiple submissions. radv currently has > > a limit of 192 which seems to work for most gfx submissions, but > > is way too high for e.g. compute or sdma. > > Why do you need so many IBs in the first place? > > You can use sub-IBs since R600 and newer hw even supports a JUMP command > to chain IBs together IIRC. Couple of reasons: 1) We can't reliably use sub-IBs (both because on GFX6-7 there are indirect draw commands that cannot be used in sub-IBs and because the Vulkan API exposes sub-IBs, so we can have the primary IBs be sub-IBs already) 2) We believed GFX6 may not support chaining. (Then again, it turns out the GFX7+ packet may just work on GFX6? https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22406) 3) Chaining doesn't work if the IB may be in flight multiple times in a different sequence. We try to chain when we can but (2) and (3) are cases where we fallback to submitting multiple IBs. > > For the kernel UAPI you only need separate IBs if you have separate > properties on them, E.g. preamble or submitting to a different engine. > > Everything else just needs additional CPU overhead in the IOCTL. > > Regards, > Christian. > > > > > Because the kernel handling is just fine we can just use the v2 > > everywhere and have the return_size do the versioning for us, > > with userspace interpreting 0 as unknown. > > > > Userspace implementation: > > https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection > > https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection > > > > v2: Use base member in the new struct. > > > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498 > > Signed-off-by: Bas Nieuwenhuizen <bas@xxxxxxxxxxxxxxxxxxx> > > Cc: David Airlie <airlied@xxxxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31 ++++++++++++++----------- > > include/uapi/drm/amdgpu_drm.h | 14 +++++++++++ > > 2 files changed, 31 insertions(+), 14 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > index 89689b940493..5b575e1aef1a 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > > @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, > > > > static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > > struct drm_amdgpu_info *info, > > - struct drm_amdgpu_info_hw_ip *result) > > + struct drm_amdgpu_info_hw_ip2 *result) > > { > > uint32_t ib_start_alignment = 0; > > uint32_t ib_size_alignment = 0; > > @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > > return -EINVAL; > > } > > > > + result->max_ibs = UINT_MAX; > > for (i = 0; i < adev->num_rings; ++i) { > > /* Note that this uses that ring types alias the equivalent > > * HW IP exposes to userspace. > > @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > > if (adev->rings[i]->funcs->type == info->query_hw_ip.type && > > adev->rings[i]->sched.ready) { > > ++num_rings; > > + result->max_ibs = min(result->max_ibs, > > + adev->rings[i]->max_ibs); > > } > > } > > > > @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, > > num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type], > > num_rings); > > > > - result->hw_ip_version_major = adev->ip_blocks[i].version->major; > > - result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; > > + result->base.hw_ip_version_major = adev->ip_blocks[i].version->major; > > + result->base.hw_ip_version_minor = adev->ip_blocks[i].version->minor; > > > > if (adev->asic_type >= CHIP_VEGA10) { > > switch (type) { > > case AMD_IP_BLOCK_TYPE_GFX: > > - result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; > > + result->base.ip_discovery_version = adev->ip_versions[GC_HWIP][0]; > > break; > > case AMD_IP_BLOCK_TYPE_SDMA: > > - result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; > > + result->base.ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; > > break; > > case AMD_IP_BLOCK_TYPE_UVD: > > case AMD_IP_BLOCK_TYPE_VCN: > > case AMD_IP_BLOCK_TYPE_JPEG: > > - result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; > > + result->base.ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; > > break; > > case AMD_IP_BLOCK_TYPE_VCE: > > - result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; > > + result->base.ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; > > break; > > default: > > - result->ip_discovery_version = 0; > > + result->base.ip_discovery_version = 0; > > break; > > } > > } else { > > - result->ip_discovery_version = 0; > > + result->base.ip_discovery_version = 0; > > } > > - result->capabilities_flags = 0; > > - result->available_rings = (1 << num_rings) - 1; > > - result->ib_start_alignment = ib_start_alignment; > > - result->ib_size_alignment = ib_size_alignment; > > + result->base.capabilities_flags = 0; > > + result->base.available_rings = (1 << num_rings) - 1; > > + result->base.ib_start_alignment = ib_start_alignment; > > + result->base.ib_size_alignment = ib_size_alignment; > > return 0; > > } > > > > @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > > } > > return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; > > case AMDGPU_INFO_HW_IP_INFO: { > > - struct drm_amdgpu_info_hw_ip ip = {}; > > + struct drm_amdgpu_info_hw_ip2 ip = {}; > > int ret; > > > > ret = amdgpu_hw_ip_info(adev, info, &ip); > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > > index b6eb90df5d05..6b9e35b6f747 100644 > > --- a/include/uapi/drm/amdgpu_drm.h > > +++ b/include/uapi/drm/amdgpu_drm.h > > @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device { > > __u32 enabled_rb_pipes_mask_hi; > > }; > > > > +/* The size of this struct cannot be increased for compatibility reasons, use > > + * struct drm_amdgpu_info_hw_ip2 instead. > > + */ > > struct drm_amdgpu_info_hw_ip { > > /** Version of h/w IP */ > > __u32 hw_ip_version_major; > > @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip { > > __u32 ip_discovery_version; > > }; > > > > +struct drm_amdgpu_info_hw_ip2 { > > + /** Previous version of the struct */ > > + struct drm_amdgpu_info_hw_ip base; > > + /** The maximum number of IBs one can submit in a single submission > > + * ioctl. (When using gang submit: this is per IP type) > > + */ > > + __u32 max_ibs; > > + /** padding to 64bit for arch differences */ > > + __u32 pad; > > +}; > > + > > struct drm_amdgpu_info_num_handles { > > /** Max handles as supported by firmware for UVD */ > > __u32 uvd_max_handles; >