[AMD Official Use Only - AMD Internal Distribution Only] Hi Jiadong, -----Original Message----- From: Zhu, Jiadong <Jiadong.Zhu@xxxxxxx> Sent: Wednesday, October 23, 2024 11:10 AM To: Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx>; Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx> Subject: RE: [PATCH 1/2] drm/amdgpu: add amdgpu_gfx_sched_mask and amdgpu_compute_sched_mask debugfs [AMD Official Use Only - AMD Internal Distribution Only] > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of > Zhang, > Jesse(Jie) > Sent: Tuesday, October 22, 2024 11:12 AM > To: Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx>; > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian > <Christian.Koenig@xxxxxxx> > Subject: RE: [PATCH 1/2] drm/amdgpu: add amdgpu_gfx_sched_mask and > amdgpu_compute_sched_mask debugfs > > [AMD Official Use Only - AMD Internal Distribution Only] > > [AMD Official Use Only - AMD Internal Distribution Only] > > Ping on this series? > > > -----Original Message----- > From: Jesse.zhang@xxxxxxx <jesse.zhang@xxxxxxx> > Sent: Friday, October 18, 2024 10:31 AM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian > <Christian.Koenig@xxxxxxx>; Zhang, Jesse(Jie) <Jesse.Zhang@xxxxxxx> > Subject: [PATCH 1/2] drm/amdgpu: add amdgpu_gfx_sched_mask and > amdgpu_compute_sched_mask debugfs > > compute/gfx may have multiple rings on some hardware. > In some cases, userspace wants to run jobs on a specific ring for > validation purposes. > This debugfs entry helps to disable or enable submitting jobs to a specific ring. > This entry is populated only if there are at least two or more cores > in the gfx/compute ip. > > Signed-off-by: Jesse Zhang <jesse.zhang@xxxxxxx> Suggested-by:Alex > Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 142 ++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + > 3 files changed, 146 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index 37d8657f0776..6e3f657cab9c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -2096,6 +2096,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) > amdgpu_debugfs_umsch_fwlog_init(adev, > &adev->umsch_mm); > > amdgpu_debugfs_jpeg_sched_mask_init(adev); > + amdgpu_debugfs_gfx_sched_mask_init(adev); > + amdgpu_debugfs_compute_sched_mask_init(adev); > > amdgpu_ras_debugfs_create_all(adev); > amdgpu_rap_debugfs_init(adev); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index b6acbe923b6b..29997c9f68b6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -1868,3 +1868,145 @@ void > amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) > } > mutex_unlock(&adev->enforce_isolation_mutex); > } > + > +/* > + * debugfs for to enable/disable gfx job submission to specific core. > + */ > +#if defined(CONFIG_DEBUG_FS) > +static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) { > + struct amdgpu_device *adev = (struct amdgpu_device *)data; > + u32 i; > + u64 mask = 0; > + struct amdgpu_ring *ring; > + > + if (!adev) > + return -ENODEV; > + > + mask = (1 << adev->gfx.num_gfx_rings) - 1; > + if ((val & mask) == 0) > + return -EINVAL; Is this check used to prevent all the rings unscheduled? Yes, we must keep a ring available. Regards Jesse Thanks, Jiadong > + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { > + ring = &adev->gfx.gfx_ring[i]; > + if (val & (1 << i)) > + ring->sched.ready = true; > + else > + ring->sched.ready = false; > + } > + /* publish sched.ready flag update effective immediately across smp */ > + smp_rmb(); > + return 0; > +} > + > +static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) { > + struct amdgpu_device *adev = (struct amdgpu_device *)data; > + u32 i; > + u64 mask = 0; > + struct amdgpu_ring *ring; > + > + if (!adev) > + return -ENODEV; > + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { > + ring = &adev->gfx.gfx_ring[i]; > + if (ring->sched.ready) > + mask |= 1 << i; > + } > + > + *val = mask; > + return 0; > +} > + > +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops, > + amdgpu_debugfs_gfx_sched_mask_get, > + amdgpu_debugfs_gfx_sched_mask_set, "%llx\n"); > + > +#endif > + > +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev) { > +#if defined(CONFIG_DEBUG_FS) > + struct drm_minor *minor = adev_to_drm(adev)->primary; > + struct dentry *root = minor->debugfs_root; > + char name[32]; > + > + if (!(adev->gfx.num_gfx_rings > 1)) > + return; > + sprintf(name, "amdgpu_gfx_sched_mask"); > + debugfs_create_file(name, 0600, root, adev, > + &amdgpu_debugfs_gfx_sched_mask_fops); > +#endif > +} > + > +/* > + * debugfs for to enable/disable compute job submission to specific core. > + */ > +#if defined(CONFIG_DEBUG_FS) > +static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) { > + struct amdgpu_device *adev = (struct amdgpu_device *)data; > + u32 i; > + u64 mask = 0; > + struct amdgpu_ring *ring; > + > + if (!adev) > + return -ENODEV; > + > + mask = (1 << adev->gfx.num_compute_rings) - 1; > + if ((val & mask) == 0) > + return -EINVAL; > + > + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { > + ring = &adev->gfx.compute_ring[i]; > + if (val & (1 << i)) > + ring->sched.ready = true; > + else > + ring->sched.ready = false; > + } > + > + /* publish sched.ready flag update effective immediately across smp */ > + smp_rmb(); > + return 0; > +} > + > +static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 > +*val) { > + struct amdgpu_device *adev = (struct amdgpu_device *)data; > + u32 i; > + u64 mask = 0; > + struct amdgpu_ring *ring; > + > + if (!adev) > + return -ENODEV; > + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { > + ring = &adev->gfx.compute_ring[i]; > + if (ring->sched.ready) > + mask |= 1 << i; > + } > + > + *val = mask; > + return 0; > +} > + > +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fop > s, > + amdgpu_debugfs_compute_sched_mask_get, > + amdgpu_debugfs_compute_sched_mask_set, > +"%llx\n"); > + > +#endif > + > +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device > +*adev) { #if defined(CONFIG_DEBUG_FS) > + struct drm_minor *minor = adev_to_drm(adev)->primary; > + struct dentry *root = minor->debugfs_root; > + char name[32]; > + > + if (!(adev->gfx.num_compute_rings > 1)) > + return; > + sprintf(name, "amdgpu_compute_sched_mask"); > + debugfs_create_file(name, 0600, root, adev, > + &amdgpu_debugfs_compute_sched_mask_fops); > +#endif > +} > + > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index f710178a21bc..9275c02c94c6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -582,6 +582,8 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct > amdgpu_device *adev); void > amdgpu_gfx_enforce_isolation_handler(struct > work_struct *work); void > amdgpu_gfx_enforce_isolation_ring_begin_use(struct > amdgpu_ring *ring); void > amdgpu_gfx_enforce_isolation_ring_end_use(struct > amdgpu_ring *ring); > +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); > +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device > +*adev); > > static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { > -- > 2.25.1