On Wed, May 01, 2024 at 07:50:43PM +0100, Adrián Larumbe wrote: > Up to this day, all fdinfo-based GPU profilers must traverse the entire > /proc directory structure to find open DRM clients with fdinfo file > descriptors. This is inefficient and time-consuming. > > This patch adds a new DRM ioctl that allows users to obtain a list of PIDs > for clients who have opened the DRM device. Output from the ioctl isn't > human-readable, and it's meant to be retrieved only by GPU profilers like > gputop and nvtop. > > Cc: Rob Clark <robdclark@xxxxxxxxx> > Cc: Tvrtko Ursulin <tursulin@xxxxxxxxxxx> > Signed-off-by: Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx> > --- > drivers/gpu/drm/drm_internal.h | 1 + > drivers/gpu/drm/drm_ioctl.c | 89 ++++++++++++++++++++++++++++++++++ > include/uapi/drm/drm.h | 7 +++ > 3 files changed, 97 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h > index 690505a1f7a5..6f78954cae16 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -243,6 +243,7 @@ static inline void drm_debugfs_encoder_remove(struct drm_encoder *encoder) > drm_ioctl_t drm_version; > drm_ioctl_t drm_getunique; > drm_ioctl_t drm_getclient; > +drm_ioctl_t drm_getclients; > > /* drm_syncobj.c */ > void drm_syncobj_open(struct drm_file *file_private); > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index e368fc084c77..da7057376581 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -207,6 +207,93 @@ int drm_getclient(struct drm_device *dev, void *data, > } > } > > +/* > + * Get list of client PIDs who have opened a DRM file > + * > + * \param dev DRM device we are querying > + * \param data IOCTL command input. > + * \param file_priv DRM file private. > + * > + * \return zero on success or a negative number on failure. > + * > + * Traverses list of open clients for the given DRM device, and > + * copies them into userpace as an array of PIDs > + */ > +int drm_getclients(struct drm_device *dev, void *data, > + struct drm_file *file_priv) > + > +{ > + struct drm_get_clients *get_clients = data; > + ssize_t size = get_clients->len; > + char __user *pid_buf; > + ssize_t offset = 0; > + int ret = 0; > + > + /* > + * We do not want to show clients of display only devices so > + * as to avoid confusing UM GPU profilers > + */ > + if (!dev->render) { > + get_clients->len = 0; > + return 0; > + } > + > + /* > + * An input size of zero means UM wants to know the size of the PID buffer > + * We round it up to the nearest multiple of the page size so that we can have > + * some spare headroom in case more clients came in between successive calls > + * of this ioctl, and also to simplify parsing of the PIDs buffer, because > + * sizeof(pid_t) will hopefully always divide PAGE_SIZE > + */ > + if (size == 0) { > + get_clients->len = > + roundup(atomic_read(&dev->open_count) * sizeof(pid_t), PAGE_SIZE); > + return 0; > + } > + > + pid_buf = (char *)(void *)get_clients->user_data; > + > + if (!pid_buf) > + return -EINVAL; > + > + mutex_lock(&dev->filelist_mutex); > + list_for_each_entry_reverse(file_priv, &dev->filelist, lhead) { > + pid_t pid_num; > + > + if ((size - offset) < sizeof(pid_t)) > + break; > + > + rcu_read_lock(); > + pid_num = pid_vnr(rcu_dereference(file_priv->pid)); > + rcu_read_unlock(); > + > + /* We do not want to return the profiler's PID */ > + if (pid_vnr(task_tgid(current)) == pid_num) > + continue; > + > + ret = copy_to_user(pid_buf + offset, &pid_num, sizeof(pid_t)); > + if (ret) > + break; > + > + offset += sizeof(pid_t); > + } > + mutex_unlock(&dev->filelist_mutex); > + > + if (ret) > + return -EFAULT; > + > + if ((size - offset) >= sizeof(pid_t)) { > + pid_t pid_zero = 0; > + > + ret = copy_to_user(pid_buf + offset, > + &pid_zero, sizeof(pid_t)); > + if (ret) > + return -EFAULT; > + } > + > + return 0; > +} > + > /* > * Get statistics information. > * > @@ -672,6 +759,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { > DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER), > DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER), > DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER), > + > + DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENTS, drm_getclients, DRM_RENDER_ALLOW), Uh RENDER_ALLOW sounds like a very bad idea for this, flat out leaks information that really paranoid people don't want to have leaked. Natural would be to limit to ptrace ability, but ptrace is for processes and fd aren't tied to that. So I'm not sure ... I think a separate file (whether in procfs or a special chardev) where you can set access rights that suit feels a lot better for this. Plus putting it into procfs would also give the natural property that you can only read it if you have access to procfs anyway, so imo that feels like the best place for this ... And yes that means some lkml bikeshedding with procfs folks, but I do think that's good here since we're likely not the only ones who need a bit faster procfs trawling for some special use-cases. So consistency across subsystems would be nice to at least try to aim for. -Sima > }; > > #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE(drm_ioctls) > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 16122819edfe..c47aa9de51ab 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -1024,6 +1024,11 @@ struct drm_crtc_queue_sequence { > __u64 user_data; /* user data passed to event */ > }; > > +struct drm_get_clients { > + __u64 user_data; > + __kernel_size_t len; > +}; > + > #if defined(__cplusplus) > } > #endif > @@ -1236,6 +1241,8 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) > #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) > > +#define DRM_IOCTL_GET_CLIENTS DRM_IOWR(0xD1, struct drm_get_clients) > + > /** > * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. > * > -- > 2.44.0 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch