On Mon, Aug 11, 2014 at 5:08 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > Am 07.08.2014 um 21:43 schrieb Alex Deucher: > >> On Thu, Aug 7, 2014 at 11:32 AM, Christian König >> <deathsimple@xxxxxxxxxxx> wrote: >>> >>> Am 07.08.2014 um 16:32 schrieb Alex Deucher: >>> >>>> On Thu, Aug 7, 2014 at 7:33 AM, Christian König >>>> <deathsimple@xxxxxxxxxxx> >>>> wrote: >>>>> >>>>> From: Marco A Benatto <marco.antonio.780@xxxxxxxxx> >>>>> >>>>> Adding a Frames Per Second estimation logic on UVD handles >>>>> when it has being used. This estimation is per handle basis >>>>> and will help on DPM profile calculation. >>>>> >>>>> v2 (chk): fix timestamp type, move functions around and >>>>> cleanup code a bit. >>>> >>>> Will this really help much? I thought the problem was mainly due to >>>> sclk and mclk for post processing. >>> >>> >>> It should at least handle the UVD side for upclocking when you get a lot >>> of >>> streams / fps. And at on my NI the patch seems to do exactly that. >>> >>> Switching sclk and mclk for post processing is a different task, and I >>> actually have no idea what to do with them. >> >> At this point we always choose the plain UVD state anyway so this >> patch would only take effect if we re-enabled the dynamic UVD state >> selection. > > > Hui? I thought we already re-enabled the dynamic UVD state selection, but > double checking this I found it disabled again. > > What was the problem with that? Looks like I somehow missed the discussion > around it. We did, but after doing so a number of people complained about a regression on IRC because when apps like xmbc enabled post processing, performance went down. Alex > > Christian. > > >> For the post processing, we probably need a hint we can >> pass to the driver in the CS ioctl to denote what state we need. >> Although if we did that, this could would largely be moot. That said, >> newer asics support dynamic UVD clocks so we really only need >> something like that for older asics and I guess VCE. >> >> Alex >> >>> Christian. >>> >>> >>>> Alex >>>> >>>>> Signed-off-by: Marco A Benatto <marco.antonio.780@xxxxxxxxx> >>>>> Signed-off-by: Christian König <christian.koenig@xxxxxxx> >>>>> --- >>>>> drivers/gpu/drm/radeon/radeon.h | 10 ++++++ >>>>> drivers/gpu/drm/radeon/radeon_uvd.c | 64 >>>>> +++++++++++++++++++++++++++++++++---- >>>>> 2 files changed, 68 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>>>> b/drivers/gpu/drm/radeon/radeon.h >>>>> index 9e1732e..e92f6cb 100644 >>>>> --- a/drivers/gpu/drm/radeon/radeon.h >>>>> +++ b/drivers/gpu/drm/radeon/radeon.h >>>>> @@ -1617,6 +1617,15 @@ int radeon_pm_get_type_index(struct >>>>> radeon_device >>>>> *rdev, >>>>> #define RADEON_UVD_STACK_SIZE (1024*1024) >>>>> #define RADEON_UVD_HEAP_SIZE (1024*1024) >>>>> >>>>> +#define RADEON_UVD_FPS_EVENTS_MAX 8 >>>>> +#define RADEON_UVD_DEFAULT_FPS 60 >>>>> + >>>>> +struct radeon_uvd_fps { >>>>> + uint64_t timestamp; >>>>> + uint8_t event_index; >>>>> + uint8_t events[RADEON_UVD_FPS_EVENTS_MAX]; >>>>> +}; >>>>> + >>>>> struct radeon_uvd { >>>>> struct radeon_bo *vcpu_bo; >>>>> void *cpu_addr; >>>>> @@ -1626,6 +1635,7 @@ struct radeon_uvd { >>>>> struct drm_file *filp[RADEON_MAX_UVD_HANDLES]; >>>>> unsigned img_size[RADEON_MAX_UVD_HANDLES]; >>>>> struct delayed_work idle_work; >>>>> + struct radeon_uvd_fps fps_info[RADEON_MAX_UVD_HANDLES]; >>>>> }; >>>>> >>>>> int radeon_uvd_init(struct radeon_device *rdev); >>>>> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> index 6bf55ec..ef5667a 100644 >>>>> --- a/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c >>>>> @@ -237,6 +237,51 @@ void radeon_uvd_force_into_uvd_segment(struct >>>>> radeon_bo *rbo) >>>>> rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; >>>>> } >>>>> >>>>> +static void radeon_uvd_fps_clear_events(struct radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>> + unsigned i; >>>>> + >>>>> + fps->timestamp = jiffies_64; >>>>> + fps->event_index = 0; >>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) >>>>> + fps->events[i] = 0; >>>>> +} >>>>> + >>>>> +static void radeon_uvd_fps_note_event(struct radeon_device *rdev, int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>> + uint64_t timestamp = jiffies_64; >>>>> + unsigned rate = 0; >>>>> + >>>>> + uint8_t index = fps->event_index++; >>>>> + fps->event_index %= RADEON_UVD_FPS_EVENTS_MAX; >>>>> + >>>>> + rate = div64_u64(HZ, max(timestamp - fps->timestamp, 1ULL)); >>>>> + >>>>> + fps->timestamp = timestamp; >>>>> + fps->events[index] = min(rate, 120u); >>>>> +} >>>>> + >>>>> +static unsigned radeon_uvd_estimate_fps(struct radeon_device *rdev, >>>>> int >>>>> idx) >>>>> +{ >>>>> + struct radeon_uvd_fps *fps = &rdev->uvd.fps_info[idx]; >>>>> + unsigned i, valid = 0, count = 0; >>>>> + >>>>> + for (i = 0; i < RADEON_UVD_FPS_EVENTS_MAX; i++) { >>>>> + /* We should ignore zero values */ >>>>> + if (fps->events[i] != 0) { >>>>> + count += fps->events[i]; >>>>> + valid++; >>>>> + } >>>>> + } >>>>> + >>>>> + if (valid > 0) >>>>> + return count / valid; >>>>> + else >>>>> + return RADEON_UVD_DEFAULT_FPS; >>>>> +} >>>>> + >>>>> void radeon_uvd_free_handles(struct radeon_device *rdev, struct >>>>> drm_file *filp) >>>>> { >>>>> int i, r; >>>>> @@ -419,8 +464,10 @@ static int radeon_uvd_cs_msg(struct >>>>> radeon_cs_parser >>>>> *p, struct radeon_bo *bo, >>>>> >>>>> /* create or decode, validate the handle */ >>>>> for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) { >>>>> - if (atomic_read(&p->rdev->uvd.handles[i]) == handle) >>>>> + if (atomic_read(&p->rdev->uvd.handles[i]) == handle) { >>>>> + radeon_uvd_fps_note_event(p->rdev, i); >>>>> return 0; >>>>> + } >>>>> } >>>>> >>>>> /* handle not found try to alloc a new one */ >>>>> @@ -428,6 +475,7 @@ static int radeon_uvd_cs_msg(struct >>>>> radeon_cs_parser >>>>> *p, struct radeon_bo *bo, >>>>> if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, >>>>> handle)) { >>>>> p->rdev->uvd.filp[i] = p->filp; >>>>> p->rdev->uvd.img_size[i] = img_size; >>>>> + radeon_uvd_fps_clear_events(p->rdev, i); >>>>> return 0; >>>>> } >>>>> } >>>>> @@ -763,7 +811,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device >>>>> *rdev, int ring, >>>>> static void radeon_uvd_count_handles(struct radeon_device *rdev, >>>>> unsigned *sd, unsigned *hd) >>>>> { >>>>> - unsigned i; >>>>> + unsigned i, fps_rate = 0; >>>>> >>>>> *sd = 0; >>>>> *hd = 0; >>>>> @@ -772,10 +820,13 @@ static void radeon_uvd_count_handles(struct >>>>> radeon_device *rdev, >>>>> if (!atomic_read(&rdev->uvd.handles[i])) >>>>> continue; >>>>> >>>>> - if (rdev->uvd.img_size[i] >= 720*576) >>>>> - ++(*hd); >>>>> - else >>>>> - ++(*sd); >>>>> + fps_rate = radeon_uvd_estimate_fps(rdev, i); >>>>> + >>>>> + if (rdev->uvd.img_size[i] >= 720*576) { >>>>> + (*hd) += fps_rate > 30 ? 1 : 2; >>>>> + } else { >>>>> + (*sd) += fps_rate > 30 ? 1 : 2; >>>>> + } >>>>> } >>>>> } >>>>> >>>>> @@ -805,6 +856,7 @@ void radeon_uvd_note_usage(struct radeon_device >>>>> *rdev) >>>>> set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work, >>>>> >>>>> msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS)); >>>>> >>>>> + >>>>> if ((rdev->pm.pm_method == PM_METHOD_DPM) && >>>>> rdev->pm.dpm_enabled) { >>>>> unsigned hd = 0, sd = 0; >>>>> radeon_uvd_count_handles(rdev, &sd, &hd); >>>>> -- >>>>> 1.9.1 >>>>> > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel