On Thu, Mar 20, 2025 at 2:18 PM Danilo Krummrich <dakr@xxxxxxxxxx> wrote: > > Hi Mel, Hi, thanks for the review. > On Wed, Mar 12, 2025 at 05:36:14PM -0400, Mel Henning wrote: > > Userspace needs this information to set up zcull correctly. > > This is a very brief motivation for the commit, please also describe what the > commit does using imperative form. Sure, I guess I'll move some of the motivation that I wrote in the intro email for this series into this commit message. > > > > Signed-off-by: Mel Henning <mhenning@xxxxxxxxxxxxxxxxxx> > > --- > > .../drm/nouveau/include/nvkm/core/device.h | 6 ++ > > .../sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h | 69 +++++++++++++++++++ > > drivers/gpu/drm/nouveau/nouveau_abi16.c | 15 ++++ > > drivers/gpu/drm/nouveau/nouveau_abi16.h | 1 + > > drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + > > drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c | 34 +++++++++ > > include/uapi/drm/nouveau_drm.h | 19 +++++ > > 7 files changed, 145 insertions(+) > > > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h > > index 46afb877a296..d1742ff1cf6b 100644 > > --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h > > +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h > > @@ -3,6 +3,9 @@ > > #define __NVKM_DEVICE_H__ > > #include <core/oclass.h> > > #include <core/intr.h> > > + > > +#include "uapi/drm/nouveau_drm.h" > > + > > enum nvkm_subdev_type; > > > > enum nvkm_device_type { > > @@ -72,6 +75,9 @@ struct nvkm_device { > > bool armed; > > bool legacy_done; > > } intr; > > + > > + bool has_zcull_info; > > + struct drm_nouveau_get_zcull_info zcull_info; > > This is bypassing the nvif layer entirely. I think you should store the contents > of struct NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS in struct r535_gr and have an > nvif interface to access the data. Sure. I think my main tripping point here is: do we re-declare the struct another time? It feels annoying to re-define it for each layer, and I don't suppose we can expose the CTRL2080 or drm structures directly in the NVIF layer. > > }; > > > > struct nvkm_subdev *nvkm_device_subdev(struct nvkm_device *, int type, int inst); > > diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h > > index 59f8895bc5d7..01884b926c9c 100644 > > --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h > > +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h > > @@ -26,6 +26,75 @@ > > * DEALINGS IN THE SOFTWARE. > > */ > > > > +/** > > + * NV2080_CTRL_CMD_GR_GET_ZCULL_INFO > > + * > > + * This command is used to query the RM for zcull information that the > > + * driver will need to allocate and manage the zcull regions. > > + * > > + * widthAlignPixels > > + * This parameter returns the width alignment restrictions in pixels > > + * used to adjust a surface for proper aliquot coverage (typically > > + * #TPC's * 16). > > + * > > + * heightAlignPixels > > + * This parameter returns the height alignment restrictions in pixels > > + * used to adjust a surface for proper aliquot coverage (typically 32). > > + * > > + * pixelSquaresByAliquots > > + * This parameter returns the pixel area covered by an aliquot > > + * (typically #Zcull_banks * 16 * 16). > > + * > > + * aliquotTotal > > + * This parameter returns the total aliquot pool available in HW. > > + * > > + * zcullRegionByteMultiplier > > + * This parameter returns multiplier used to convert aliquots in a region > > + * to the number of bytes required to save/restore them. > > + * > > + * zcullRegionHeaderSize > > + * This parameter returns the region header size which is required to be > > + * allocated and accounted for in any save/restore operation on a region. > > + * > > + * zcullSubregionHeaderSize > > + * This parameter returns the subregion header size which is required to be > > + * allocated and accounted for in any save/restore operation on a region. > > + * > > + * subregionCount > > + * This parameter returns the subregion count. > > + * > > + * subregionWidthAlignPixels > > + * This parameter returns the subregion width alignment restrictions in > > + * pixels used to adjust a surface for proper aliquot coverage > > + * (typically #TPC's * 16). > > + * > > + * subregionHeightAlignPixels > > + * This parameter returns the subregion height alignment restrictions in > > + * pixels used to adjust a surface for proper aliquot coverage > > + * (typically 62). > > + * > > + * The callee should compute the size of a zcull region as follows. > > + * (numBytes = aliquots * zcullRegionByteMultiplier + > > + * zcullRegionHeaderSize + zcullSubregionHeaderSize) > > + */ > > +#define NV2080_CTRL_CMD_GR_GET_ZCULL_INFO (0x20801206U) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_GR_INTERFACE_ID << 8) | NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS_MESSAGE_ID" */ > > + > > +#define NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS_SUBREGION_SUPPORTED > > +#define NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS_MESSAGE_ID (0x6U) > > + > > +typedef struct NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS { > > + NvU32 widthAlignPixels; > > + NvU32 heightAlignPixels; > > + NvU32 pixelSquaresByAliquots; > > + NvU32 aliquotTotal; > > + NvU32 zcullRegionByteMultiplier; > > + NvU32 zcullRegionHeaderSize; > > + NvU32 zcullSubregionHeaderSize; > > + NvU32 subregionCount; > > + NvU32 subregionWidthAlignPixels; > > + NvU32 subregionHeightAlignPixels; > > +} NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS; > > + > > typedef enum NV2080_CTRL_CMD_GR_CTXSW_PREEMPTION_BIND_BUFFERS { > > NV2080_CTRL_CMD_GR_CTXSW_PREEMPTION_BIND_BUFFERS_MAIN = 0, > > NV2080_CTRL_CMD_GR_CTXSW_PREEMPTION_BIND_BUFFERS_SPILL = 1, > > diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c > > index 2a0617e5fe2a..981abea97546 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c > > @@ -333,6 +333,21 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) > > return 0; > > } > > > > +int > > +nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS) > > +{ > > + struct nouveau_drm *drm = nouveau_drm(dev); > > + struct nvkm_device *device = drm->nvkm; > > + struct drm_nouveau_get_zcull_info *out = data; > > + > > + if (device->has_zcull_info) { > > + *out = device->zcull_info; > > This needs copy_to_user(). > > > + return 0; > > + } else { > > + return -ENODEV; > > + } > > +} > > + > > int > > nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) > > { > > diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h > > index af6b4e1cefd2..134b3ab58719 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h > > +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h > > @@ -6,6 +6,7 @@ > > struct drm_device *dev, void *data, struct drm_file *file_priv > > > > int nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS); > > +int nouveau_abi16_ioctl_get_zcull_info(ABI16_IOCTL_ARGS); > > int nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS); > > int nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS); > > int nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS); > > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c > > index 107f63f08bd9..4c4168b50e60 100644 > > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c > > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c > > @@ -1244,6 +1244,7 @@ nouveau_ioctls[] = { > > DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_RENDER_ALLOW), > > + DRM_IOCTL_DEF_DRV(NOUVEAU_GET_ZCULL_INFO, nouveau_abi16_ioctl_get_zcull_info, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_RENDER_ALLOW), > > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c > > index f4bed3eb1ec2..6669f2b1f492 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/r535.c > > @@ -34,6 +34,7 @@ > > #include <nvrm/535.113.01/common/sdk/nvidia/inc/alloc/alloc_channel.h> > > #include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl0080/ctrl0080fifo.h> > > #include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gpu.h> > > +#include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080gr.h> > > #include <nvrm/535.113.01/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080internal.h> > > #include <nvrm/535.113.01/nvidia/generated/g_kernel_channel_nvoc.h> > > > > @@ -244,6 +245,8 @@ static int > > r535_gr_oneinit(struct nvkm_gr *base) > > { > > NV2080_CTRL_INTERNAL_STATIC_GR_GET_CONTEXT_BUFFERS_INFO_PARAMS *info; > > + NV2080_CTRL_GR_GET_ZCULL_INFO_PARAMS *zcull_info; > > + u32 zcull_ctxsw_size, zcull_ctxsw_align; > > struct r535_gr *gr = container_of(base, typeof(*gr), base); > > struct nvkm_subdev *subdev = &gr->base.engine.subdev; > > struct nvkm_device *device = subdev->device; > > @@ -418,6 +421,11 @@ r535_gr_oneinit(struct nvkm_gr *base) > > } > > } > > > > + zcull_ctxsw_size = info->engineContextBuffersInfo[0] > > + .engine[NV0080_CTRL_FIFO_GET_ENGINE_CONTEXT_PROPERTIES_ENGINE_ID_GRAPHICS_ZCULL].size; > > + zcull_ctxsw_align = info->engineContextBuffersInfo[0] > > + .engine[NV0080_CTRL_FIFO_GET_ENGINE_CONTEXT_PROPERTIES_ENGINE_ID_GRAPHICS_ZCULL].alignment; > > + > > nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, info); > > > > /* Promote golden context to RM. */ > > @@ -450,6 +458,32 @@ r535_gr_oneinit(struct nvkm_gr *base) > > } > > } > > > > + zcull_info = nvkm_gsp_rm_ctrl_rd(&gsp->internal.device.subdevice, > > + NV2080_CTRL_CMD_GR_GET_ZCULL_INFO, > > + sizeof(*zcull_info)); > > + if (WARN_ON(IS_ERR(zcull_info))) { > > + ret = PTR_ERR(zcull_info); > > + goto done; > > + } > > + > > + device->has_zcull_info = true; > > + > > + device->zcull_info.width_align_pixels = zcull_info->widthAlignPixels; > > + device->zcull_info.height_align_pixels = zcull_info->heightAlignPixels; > > + device->zcull_info.pixel_squares_by_aliquots = zcull_info->pixelSquaresByAliquots; > > + device->zcull_info.aliquot_total = zcull_info->aliquotTotal; > > + device->zcull_info.zcull_region_byte_multiplier = zcull_info->zcullRegionByteMultiplier; > > + device->zcull_info.zcull_region_header_size = zcull_info->zcullRegionHeaderSize; > > + device->zcull_info.zcull_subregion_header_size = zcull_info->zcullSubregionHeaderSize; > > + device->zcull_info.subregion_count = zcull_info->subregionCount; > > + device->zcull_info.subregion_width_align_pixels = zcull_info->subregionWidthAlignPixels; > > + device->zcull_info.subregion_height_align_pixels = zcull_info->subregionHeightAlignPixels; > > + > > + device->zcull_info.ctxsw_size = zcull_ctxsw_size; > > + device->zcull_info.ctxsw_align = zcull_ctxsw_align; > > + > > + nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, zcull_info); > > Please move this to a separate function. > > > + > > done: > > nvkm_gsp_rm_free(&golden.chan); > > for (int i = gr->ctxbuf_nr - 1; i >= 0; i--) > > diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h > > index dd87f8f30793..33361784eb4e 100644 > > --- a/include/uapi/drm/nouveau_drm.h > > +++ b/include/uapi/drm/nouveau_drm.h > > Please do the uAPI change in a separate commit. > > > @@ -432,6 +432,22 @@ struct drm_nouveau_exec { > > __u64 push_ptr; > > }; > > > > +struct drm_nouveau_get_zcull_info { > > Please add some documentation to this structure. Okay. I guess I'll mostly copy the docs from the CTRL2080 structure. > > + __u32 width_align_pixels; > > + __u32 height_align_pixels; > > + __u32 pixel_squares_by_aliquots; > > + __u32 aliquot_total; > > + __u32 zcull_region_byte_multiplier; > > + __u32 zcull_region_header_size; > > + __u32 zcull_subregion_header_size; > > + __u32 subregion_count; > > + __u32 subregion_width_align_pixels; > > + __u32 subregion_height_align_pixels; > > + > > + __u32 ctxsw_size; > > + __u32 ctxsw_align; > > +}; > > What if this ever changes between hardware revisions or firmware versions? There was some previous discussion of that here: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12596#note_2796853