From: Sourab Gupta <sourab.gupta@xxxxxxxxx> The mode of asynchronous OA counter snapshot collection would need insertion of MI_REPORT_PERF_COUNT commands into the ringbuffer. Therefore, during the stop event call, we need to wait for GPU to complete processing the last request for which MI_RPC command was inserted. We need to ensure the processing is completed before event_destroy callback which deallocates the buffer Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_oa_perf.c | 95 ++++++++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d738f7a..5453842 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1979,6 +1979,8 @@ struct drm_i915_private { u8 *snapshot; } oa_async_buffer; struct work_struct work_timer; + struct work_struct work_event_stop; + struct completion complete; } oa_pmu; #endif diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 3bf4c47..5d63dab 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -118,6 +118,9 @@ void forward_oa_async_snapshots_work(struct work_struct *__work) int ret, head, tail, num_nodes; struct drm_i915_gem_request *req; + if (dev_priv->oa_pmu.event_active == false) + return; + first_node = (struct drm_i915_oa_async_node *) ((char *)hdr + hdr->data_offset); num_nodes = (hdr->size_in_bytes - hdr->data_offset) / @@ -298,6 +301,7 @@ static void flush_oa_snapshots(struct drm_i915_private *dev_priv, static void oa_async_buffer_destroy(struct drm_i915_private *i915) { + wait_for_completion(&i915->oa_pmu.complete); mutex_lock(&i915->dev->struct_mutex); vunmap(i915->oa_pmu.oa_async_buffer.addr); @@ -854,6 +858,63 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } + +void i915_oa_async_stop_work_fn(struct work_struct *__work) +{ + struct drm_i915_private *dev_priv = + container_of(__work, typeof(*dev_priv), + oa_pmu.work_event_stop); + struct perf_event *event = dev_priv->oa_pmu.exclusive_event; + struct drm_i915_oa_async_queue_header *hdr = + (struct drm_i915_oa_async_queue_header *) + dev_priv->oa_pmu.oa_async_buffer.addr; + struct drm_i915_oa_async_node *first_node, *node; + struct drm_i915_gem_request *req; + int ret, head, tail, num_nodes; + + first_node = (struct drm_i915_oa_async_node *) + ((char *)hdr + hdr->data_offset); + num_nodes = (hdr->size_in_bytes - hdr->data_offset) / + sizeof(*node); + + + ret = i915_mutex_lock_interruptible(dev_priv->dev); + if (ret) + return; + + dev_priv->oa_pmu.event_active = false; + + i915_oa_async_wait_gpu(dev_priv); + + update_oacontrol(dev_priv); + mmiowb(); + + /* Ensure that all requests are completed*/ + tail = hdr->node_count; + head = dev_priv->oa_pmu.oa_async_buffer.head; + while ((head % num_nodes) != (tail % num_nodes)) { + node = &first_node[head % num_nodes]; + req = node->node_info.req; + if (req && !i915_gem_request_completed(req, true)) + WARN_ON(1); + head++; + } + + if (event->attr.sample_period) { + hrtimer_cancel(&dev_priv->oa_pmu.timer); + flush_oa_snapshots(dev_priv, false); + } + cancel_work_sync(&dev_priv->oa_pmu.work_timer); + + dev_priv->oa_pmu.oa_async_buffer.tail = 0; + dev_priv->oa_pmu.oa_async_buffer.head = 0; + + mutex_unlock(&dev_priv->dev->struct_mutex); + + event->hw.state = PERF_HES_STOPPED; + complete(&dev_priv->oa_pmu.complete); +} + static void i915_oa_event_start(struct perf_event *event, int flags) { struct drm_i915_private *dev_priv = @@ -939,25 +1000,23 @@ static void i915_oa_event_stop(struct perf_event *event, int flags) container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu); unsigned long lock_flags; - spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); - - dev_priv->oa_pmu.event_active = false; - update_oacontrol(dev_priv); - - mmiowb(); - spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + if (dev_priv->oa_pmu.async_sample_mode) + schedule_work(&dev_priv->oa_pmu.work_event_stop); + else { + spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags); + dev_priv->oa_pmu.event_active = false; + update_oacontrol(dev_priv); - if (event->attr.sample_period) { - hrtimer_cancel(&dev_priv->oa_pmu.timer); - flush_oa_snapshots(dev_priv, false); - } + mmiowb(); + spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags); + if (event->attr.sample_period) { + hrtimer_cancel(&dev_priv->oa_pmu.timer); + flush_oa_snapshots(dev_priv, false); + } - if (dev_priv->oa_pmu.async_sample_mode) { - dev_priv->oa_pmu.oa_async_buffer.tail = 0; - dev_priv->oa_pmu.oa_async_buffer.head = 0; + event->hw.state = PERF_HES_STOPPED; } - event->hw.state = PERF_HES_STOPPED; } static int i915_oa_event_add(struct perf_event *event, int flags) @@ -1092,6 +1151,8 @@ void i915_oa_pmu_register(struct drm_device *dev) i915->oa_pmu.timer.function = hrtimer_sample; INIT_WORK(&i915->oa_pmu.work_timer, forward_oa_async_snapshots_work); + INIT_WORK(&i915->oa_pmu.work_event_stop, i915_oa_async_stop_work_fn); + init_completion(&i915->oa_pmu.complete); spin_lock_init(&i915->oa_pmu.lock); @@ -1122,8 +1183,10 @@ void i915_oa_pmu_unregister(struct drm_device *dev) if (i915->oa_pmu.pmu.event_init == NULL) return; - if (i915->oa_pmu.async_sample_mode) + if (i915->oa_pmu.async_sample_mode) { cancel_work_sync(&i915->oa_pmu.work_timer); + cancel_work_sync(&i915->oa_pmu.work_event_stop); + } unregister_sysctl_table(i915->oa_pmu.sysctl_header); -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx