Re: [PATCH v5 2/4] drm/panthor: add DRM fdinfo support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue,  3 Sep 2024 21:25:36 +0100
Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx> wrote:

> Drawing from the FW-calculated values in the previous commit, we can
> increase the numbers for an open file by collecting them from finished jobs
> when updating their group synchronisation objects.
> 
> Display of fdinfo key-value pairs is governed by a flag that is by default
> disabled in the present commit, and supporting manual toggle of it will be
> the matter of a later commit.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/panthor/panthor_devfreq.c | 18 ++++++++-
>  drivers/gpu/drm/panthor/panthor_device.h  | 14 +++++++
>  drivers/gpu/drm/panthor/panthor_drv.c     | 35 ++++++++++++++++++
>  drivers/gpu/drm/panthor/panthor_sched.c   | 45 +++++++++++++++++++++++
>  4 files changed, 111 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
> index c6d3c327cc24..9d0f891b9b53 100644
> --- a/drivers/gpu/drm/panthor/panthor_devfreq.c
> +++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
> @@ -62,14 +62,20 @@ static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
>  static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
>  				  u32 flags)
>  {
> +	struct panthor_device *ptdev = dev_get_drvdata(dev);
>  	struct dev_pm_opp *opp;
> +	int err;
>  
>  	opp = devfreq_recommended_opp(dev, freq, flags);
>  	if (IS_ERR(opp))
>  		return PTR_ERR(opp);
>  	dev_pm_opp_put(opp);
>  
> -	return dev_pm_opp_set_rate(dev, *freq);
> +	err = dev_pm_opp_set_rate(dev, *freq);
> +	if (!err)
> +		ptdev->current_frequency = *freq;
> +
> +	return err;
>  }
>  
>  static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq)
> @@ -130,6 +136,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
>  	struct panthor_devfreq *pdevfreq;
>  	struct dev_pm_opp *opp;
>  	unsigned long cur_freq;
> +	unsigned long freq = ULONG_MAX;
>  	int ret;
>  
>  	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
> @@ -161,6 +168,7 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
>  		return PTR_ERR(opp);
>  
>  	panthor_devfreq_profile.initial_freq = cur_freq;
> +	ptdev->current_frequency = cur_freq;
>  
>  	/* Regulator coupling only takes care of synchronizing/balancing voltage
>  	 * updates, but the coupled regulator needs to be enabled manually.
> @@ -204,6 +212,14 @@ int panthor_devfreq_init(struct panthor_device *ptdev)
>  
>  	dev_pm_opp_put(opp);
>  
> +	/* Find the fastest defined rate  */
> +	opp = dev_pm_opp_find_freq_floor(dev, &freq);
> +	if (IS_ERR(opp))
> +		return PTR_ERR(opp);
> +	ptdev->fast_rate = freq;
> +
> +	dev_pm_opp_put(opp);
> +
>  	/*
>  	 * Setup default thresholds for the simple_ondemand governor.
>  	 * The values are chosen based on experiments.
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index a48e30d0af30..0e68f5a70d20 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -184,6 +184,17 @@ struct panthor_device {
>  
>  	/** @profile_mask: User-set profiling flags for job accounting. */
>  	u32 profile_mask;
> +
> +	/** @current_frequency: Device clock frequency at present. Set by DVFS*/
> +	unsigned long current_frequency;
> +
> +	/** @fast_rate: Maximum device clock frequency. Set by DVFS */
> +	unsigned long fast_rate;
> +};

Can we move the current_frequency/fast_rate retrieval in a separate
patch?

> +
> +struct panthor_gpu_usage {
> +	u64 time;
> +	u64 cycles;
>  };
>  
>  /**
> @@ -198,6 +209,9 @@ struct panthor_file {
>  
>  	/** @groups: Scheduling group pool attached to this file. */
>  	struct panthor_group_pool *groups;
> +
> +	/** @stats: cycle and timestamp measures for job execution. */
> +	struct panthor_gpu_usage stats;
>  };
>  
>  int panthor_device_init(struct panthor_device *ptdev);
> diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
> index b5e7b919f241..e18838754963 100644
> --- a/drivers/gpu/drm/panthor/panthor_drv.c
> +++ b/drivers/gpu/drm/panthor/panthor_drv.c
> @@ -3,12 +3,17 @@
>  /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@xxxxxxxxxx> */
>  /* Copyright 2019 Collabora ltd. */
>  
> +#ifdef CONFIG_ARM_ARCH_TIMER
> +#include <asm/arch_timer.h>
> +#endif
> +
>  #include <linux/list.h>
>  #include <linux/module.h>
>  #include <linux/of_platform.h>
>  #include <linux/pagemap.h>
>  #include <linux/platform_device.h>
>  #include <linux/pm_runtime.h>
> +#include <linux/time64.h>
>  
>  #include <drm/drm_debugfs.h>
>  #include <drm/drm_drv.h>
> @@ -1351,6 +1356,34 @@ static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
>  	return ret;
>  }
>  
> +static void panthor_gpu_show_fdinfo(struct panthor_device *ptdev,
> +				    struct panthor_file *pfile,
> +				    struct drm_printer *p)
> +{
> +	if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP) {
> +#ifdef CONFIG_ARM_ARCH_TIMER
> +		drm_printf(p, "drm-engine-panthor:\t%llu ns\n",
> +			   DIV_ROUND_UP_ULL((pfile->stats.time * NSEC_PER_SEC),
> +					    arch_timer_get_cntfrq()));
> +#endif
> +	}
> +	if (ptdev->profile_mask & PANTHOR_DEVICE_PROFILING_CYCLES)
> +		drm_printf(p, "drm-cycles-panthor:\t%llu\n", pfile->stats.cycles);

Don't know if that's an issue, but another thread might be updating the
stats while show_fdinfo() is run, which means the data you return might
be coming from two different sampling points.

> +
> +	drm_printf(p, "drm-maxfreq-panthor:\t%lu Hz\n", ptdev->fast_rate);
> +	drm_printf(p, "drm-curfreq-panthor:\t%lu Hz\n", ptdev->current_frequency);
> +}
> +
> +static void panthor_show_fdinfo(struct drm_printer *p, struct drm_file *file)
> +{
> +	struct drm_device *dev = file->minor->dev;
> +	struct panthor_device *ptdev = container_of(dev, struct panthor_device, base);
> +
> +	panthor_gpu_show_fdinfo(ptdev, file->driver_priv, p);
> +
> +	drm_show_memory_stats(p, file);
> +}
> +
>  static const struct file_operations panthor_drm_driver_fops = {
>  	.open = drm_open,
>  	.release = drm_release,
> @@ -1360,6 +1393,7 @@ static const struct file_operations panthor_drm_driver_fops = {
>  	.read = drm_read,
>  	.llseek = noop_llseek,
>  	.mmap = panthor_mmap,
> +	.show_fdinfo = drm_show_fdinfo,
>  };
>  
>  #ifdef CONFIG_DEBUG_FS
> @@ -1378,6 +1412,7 @@ static const struct drm_driver panthor_drm_driver = {
>  			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
>  	.open = panthor_open,
>  	.postclose = panthor_postclose,
> +	.show_fdinfo = panthor_show_fdinfo,
>  	.ioctls = panthor_drm_driver_ioctls,
>  	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
>  	.fops = &panthor_drm_driver_fops,
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
> index b087648bf59a..e69ab5175ae8 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -619,6 +619,18 @@ struct panthor_group {
>  	 */
>  	struct panthor_kernel_bo *syncobjs;
>  
> +	/** @fdinfo: Per-file total cycle and timestamp values reference. */
> +	struct {
> +		/** @data: Pointer to actual per-file sample data. */
> +		struct panthor_gpu_usage *data;
> +
> +		/**
> +		 * @lock: Mutex to govern concurrent access from drm file's fdinfo callback
> +		 * and job post-completion processing function
> +		 */
> +		struct mutex lock;
> +	} fdinfo;
> +
>  	/** @state: Group state. */
>  	enum panthor_group_state state;
>  
> @@ -886,6 +898,8 @@ static void group_release_work(struct work_struct *work)
>  						   release_work);
>  	u32 i;
>  
> +	mutex_destroy(&group->fdinfo.lock);
> +
>  	for (i = 0; i < group->queue_count; i++)
>  		group_free_queue(group, group->queues[i]);
>  
> @@ -2808,6 +2822,28 @@ void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
>  	}
>  }
>  
> +static void update_fdinfo_stats(struct panthor_job *job)
> +{
> +	struct panthor_group *group = job->group;
> +	struct panthor_queue *queue = group->queues[job->queue_idx];
> +	struct panthor_gpu_usage *fdinfo;
> +	struct panthor_job_profiling_data *times;
> +
> +	times = (struct panthor_job_profiling_data *)
> +		((unsigned long) queue->profiling_info.slots->kmap +
> +		 (job->profiling_slot * sizeof(struct panthor_job_profiling_data)));
> +
> +	mutex_lock(&group->fdinfo.lock);
> +	if ((group->fdinfo.data)) {
> +		fdinfo = group->fdinfo.data;
> +		if (job->profile_mask & PANTHOR_DEVICE_PROFILING_CYCLES)
> +			fdinfo->cycles += times->cycles.after - times->cycles.before;
> +		if (job->profile_mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
> +			fdinfo->time += times->time.after - times->time.before;
> +	}
> +	mutex_unlock(&group->fdinfo.lock);
> +}
> +
>  static void group_sync_upd_work(struct work_struct *work)
>  {
>  	struct panthor_group *group =
> @@ -2840,6 +2876,8 @@ static void group_sync_upd_work(struct work_struct *work)
>  	dma_fence_end_signalling(cookie);
>  
>  	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
> +		if (job->profile_mask)
> +			update_fdinfo_stats(job);
>  		list_del_init(&job->node);
>  		panthor_job_put(&job->base);
>  	}
> @@ -3430,6 +3468,9 @@ int panthor_group_create(struct panthor_file *pfile,
>  	}
>  	mutex_unlock(&sched->reset.lock);
>  
> +	group->fdinfo.data = &pfile->stats;
> +	mutex_init(&group->fdinfo.lock);
> +
>  	return gid;
>  
>  err_put_group:
> @@ -3469,6 +3510,10 @@ int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
>  	mutex_unlock(&sched->lock);
>  	mutex_unlock(&sched->reset.lock);
>  
> +	mutex_lock(&group->fdinfo.lock);
> +	group->fdinfo.data = NULL;
> +	mutex_unlock(&group->fdinfo.lock);
> +
>  	group_put(group);
>  	return 0;
>  }






[Index of Archives]     [Linux Input]     [Video for Linux]     [Gstreamer Embedded]     [Mplayer Users]     [Linux USB Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Yosemite Backpacking]

  Powered by Linux