Re: [PATCH 06/11] drm/i915: Framework for capturing command stream based OA reports

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Sourab,

As Sergio Martinez has started experimenting with this in gputop and reported seeing lots of ENOSPC errors being reported when reading I had a look into this and saw a few issues with how we check that there's data available to read in command stream mode, and a I think there's a possibility of incorrectly sorting the samples sometimes...

On Tue, Feb 16, 2016 at 5:27 AM, <sourab.gupta@xxxxxxxxx> wrote:
From: Sourab Gupta <sourab.gupta@xxxxxxxxx>


-static bool i915_oa_can_read(struct i915_perf_stream *stream)
+static bool append_oa_rcs_sample(struct i915_perf_stream *stream,
+                                struct i915_perf_read_state *read_state,
+                                struct i915_perf_cs_data_node *node)
+{
+       struct drm_i915_private *dev_priv = stream->dev_priv;
+       struct oa_sample_data data = { 0 };
+       const u8 *report = dev_priv->perf.command_stream_buf.addr +
+                               node->offset;
+       u32 sample_flags = stream->sample_flags;
+       u32 report_ts;
+
+       /*
+        * Forward the periodic OA samples which have the timestamp lower
+        * than timestamp of this sample, before forwarding this sample.
+        * This ensures samples read by user are order acc. to their timestamps
+        */
+       report_ts = *(u32 *)(report + 4);
+       dev_priv->perf.oa.ops.read(stream, read_state, report_ts);
+
+       if (sample_flags & SAMPLE_OA_SOURCE_INFO)
+               data.source = I915_PERF_OA_EVENT_SOURCE_RCS;
+
+       if (sample_flags & SAMPLE_CTX_ID)
+               data.ctx_id = node->ctx_id;
+
+       if (sample_flags & SAMPLE_OA_REPORT)
+               data.report = report;
+
+       append_oa_sample(stream, read_state, &data);
+
+       return true;
+}
+
+static void oa_rcs_append_reports(struct i915_perf_stream *stream,
+                                 struct i915_perf_read_state *read_state)
+{
+       struct drm_i915_private *dev_priv = stream->dev_priv;
+       struct i915_perf_cs_data_node *entry, *next;
+
+       list_for_each_entry_safe(entry, next,
+                                &dev_priv->perf.node_list, link) {
+               if (!i915_gem_request_completed(entry->request, true))
+                       break;
+
+               if (!append_oa_rcs_sample(stream, read_state, entry))
+                       break;
+
+               spin_lock(&dev_priv->perf.node_list_lock);
+               list_del(&entry->link);
+               spin_unlock(&dev_priv->perf.node_list_lock);
+
+               i915_gem_request_unreference__unlocked(entry->request);
+               kfree(entry);
+       }
+
+       /* Flush any remaining periodic reports */
+       dev_priv->perf.oa.ops.read(stream, read_state, U32_MAX);
 
I don't think we can flush all remaining periodic reports here - at least not if we have any in-flight MI_RPC commands - in case the next request to complete might have reports with earlier timestamps than some of these periodic reports.

Even if we have periodic reports available I think we need to throttle forwarding them based on the command stream requests completing.

This is something that userspace should understand when it explicitly decides to use command stream mode in conjunction with periodic sampling.
 
+}
+
+static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
 {
        struct drm_i915_private *dev_priv = stream->dev_priv;

-       return !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv);
+       if (stream->cs_mode)
+               return list_empty(&dev_priv->perf.node_list);
+       else
+               return true;
 }

I think this list_empty() check needs a lock around it, as it's called from stream_have_data__unlocked().

Currently only checking list_empty() can lead to some false positives that may confuse the current i915_perf_read() (the false positives come from not checking that at least one entry->request has also completed).

False positives here will affect the error reporting in i915_perf_read_locked(). The first thing i915_perf_read_locked() does is check for available data so it can return -EAGAIN for non-blocking file descriptors or block, waiting for data, for blocking fds. Once we believe there is data then if stream->read() returns zero then that's translated to a -ENOSPC error because we 'know' there's data but weren't able to copy anything to the user's buffer because it wasn't large enough to hold a complete record.
 

-static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
+static bool stream_have_data__unlocked(struct i915_perf_stream *stream)
 {
        struct drm_i915_private *dev_priv = stream->dev_priv;

@@ -397,8 +739,29 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
         * can't be destroyed until completion (such as a read()) that ensures
         * the device + OA buffer can't disappear
         */
+       return !(dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv) &&
+                command_stream_buf_is_empty(stream));
+}

In command stream mode, due to how we need to merge sort reports I think we need a more involved set of checks. We need to check that at least one entry->request in the node_list has completed, or else if the node_list is empty then we can check if !oa_buffer_is_empty().

There's going to be a thorny race to consider though: if this code ends up reporting that there is data to read because the node_list is empty and there are periodic samples, but by the time we call into stream->read() there's a new node_list entry that's not completed that will end up returning 0 which could again result in an incorrect ENOSPC error.

We should consider changing stream->read() so it can return -ENOSPC itself if appropriate and when it returns 0 we would instead either return -EAGAIN or loop back to waiting for blocking fds.

B.t.w for reference the stream->read() interface is a little different in the series I sent out recently for Haswell vs the gen8+ oa-next branch (which I haven't rebased yet) because I had to make some last minute changes to consider -EFAULT errors if there was a copy_to_user() error while reading. If you look at this issue, it could be good to also compare with these related changes I made recently.
 
+
+static bool i915_oa_can_read(struct i915_perf_stream *stream)
+{
+
+       return stream_have_data__unlocked(stream);
+}


Kind regards,
- Robert
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux