Quoting Tvrtko Ursulin (2019-05-08 13:10:38) > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > After the removal of engine global seqnos and the corresponding > intel_engine_notify tracepoints the script needs to be adjusted to cope > with the new state of things. > > To keep working it switches over using the dma_fence:dma_fence_signaled: > tracepoint and keeps one extra internal map to connect the ctx-seqno pairs > with engines. > > It also needs to key the completion events on the full engine/ctx/seqno > tokens, and adjust correspondingly the timeline sorting logic. > > v2: > * Do not use late notifications (received after context complete) when > splitting up coalesced requests. They are now much more likely and can > not be used. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > scripts/trace.pl | 82 ++++++++++++++++++++++++------------------------ > 1 file changed, 41 insertions(+), 41 deletions(-) > > diff --git a/scripts/trace.pl b/scripts/trace.pl > index 18f9f3b18396..95dc3a645e8e 100755 > --- a/scripts/trace.pl > +++ b/scripts/trace.pl > @@ -27,7 +27,8 @@ use warnings; > use 5.010; > > my $gid = 0; > -my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait, %ctxtimelines); > +my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait, > + %ctxtimelines, %ctxengines); > my @freqs; So what's ctxengines? Or rings for that matter? I take it ctxengines is really the last engine which we saw this context execute on? > > my $max_items = 3000; > @@ -66,7 +67,7 @@ Notes: > i915:i915_request_submit, \ > i915:i915_request_in, \ > i915:i915_request_out, \ > - i915:intel_engine_notify, \ > + dma_fence:dma_fence_signaled, \ > i915:i915_request_wait_begin, \ > i915:i915_request_wait_end \ > [command-to-be-profiled] > @@ -161,7 +162,7 @@ sub arg_trace > 'i915:i915_request_submit', > 'i915:i915_request_in', > 'i915:i915_request_out', > - 'i915:intel_engine_notify', > + 'dma_fence:dma_fence_signaled', > 'i915:i915_request_wait_begin', > 'i915:i915_request_wait_end' ); > > @@ -312,13 +313,6 @@ sub db_key > return $ring . '/' . $ctx . '/' . $seqno; > } > > -sub global_key > -{ > - my ($ring, $seqno) = @_; > - > - return $ring . '/' . $seqno; > -} > - > sub sanitize_ctx > { > my ($ctx, $ring) = @_; > @@ -419,6 +413,8 @@ while (<>) { > $req{'ring'} = $ring; > $req{'seqno'} = $seqno; > $req{'ctx'} = $ctx; > + die if exists $ctxengines{$ctx} and $ctxengines{$ctx} ne $ring; > + $ctxengines{$ctx} = $ring; > $ctxtimelines{$ctx . '/' . $ring} = 1; > $req{'name'} = $ctx . '/' . $seqno; > $req{'global'} = $tp{'global'}; > @@ -429,16 +425,29 @@ while (<>) { > $ringmap{$rings{$ring}} = $ring; > $db{$key} = \%req; > } elsif ($tp_name eq 'i915:i915_request_out:') { > - my $gkey = global_key($ring, $tp{'global'}); > + my $gkey; > + # Must be paired with a previous i915_request_in > + die unless exists $ctxengines{$ctx}; I'd suggest next unless, because there's always a change the capture is started part way though someone's workload. > + $gkey = db_key($ctxengines{$ctx}, $ctx, $seqno); > + > + if ($tp{'completed?'}) { > + die unless exists $db{$key}; > + die unless exists $db{$key}->{'start'}; > + die if exists $db{$key}->{'end'}; > + > + $db{$key}->{'end'} = $time; > + $db{$key}->{'notify'} = $notify{$gkey} > + if exists $notify{$gkey}; Hmm. With preempt-to-busy, a request can complete when we are no longer tracking it (it completes before we preempt it). They will still get the schedule-out tracepoint, but marked as incomplete, and there will be a signaled tp later before we try and resubmit. > + } else { > + delete $db{$key}; > + } > + } elsif ($tp_name eq 'dma_fence:dma_fence_signaled:') { > + my $gkey; > > - die unless exists $db{$key}; > - die unless exists $db{$key}->{'start'}; > - die if exists $db{$key}->{'end'}; > + die unless exists $ctxengines{$tp{'context'}}; > > - $db{$key}->{'end'} = $time; > - $db{$key}->{'notify'} = $notify{$gkey} if exists $notify{$gkey}; > - } elsif ($tp_name eq 'i915:intel_engine_notify:') { > - my $gkey = global_key($ring, $seqno); > + $gkey = db_key($ctxengines{$tp{'context'}}, $tp{'context'}, $tp{'seqno'}); > > $notify{$gkey} = $time unless exists $notify{$gkey}; > } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') { > @@ -452,7 +461,7 @@ while (<>) { > # find the largest seqno to be used for timeline sorting purposes. > my $max_seqno = 0; > foreach my $key (keys %db) { > - my $gkey = global_key($db{$key}->{'ring'}, $db{$key}->{'global'}); > + my $gkey = db_key($db{$key}->{'ring'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'}); > > die unless exists $db{$key}->{'start'}; > > @@ -478,14 +487,13 @@ my $key_count = scalar(keys %db); > > my %engine_timelines; > > -sub sortEngine { > - my $as = $db{$a}->{'global'}; > - my $bs = $db{$b}->{'global'}; > +sub sortStart { > + my $as = $db{$a}->{'start'}; > + my $bs = $db{$b}->{'start'}; > my $val; > > $val = $as <=> $bs; > - > - die if $val == 0; > + $val = $a cmp $b if $val == 0; > > return $val; > } > @@ -497,9 +505,7 @@ sub get_engine_timeline { > return $engine_timelines{$ring} if exists $engine_timelines{$ring}; > > @timeline = grep { $db{$_}->{'ring'} eq $ring } keys %db; > - # FIXME seqno restart > - @timeline = sort sortEngine @timeline; > - > + @timeline = sort sortStart @timeline; > $engine_timelines{$ring} = \@timeline; > > return \@timeline; > @@ -561,20 +567,10 @@ foreach my $gid (sort keys %rings) { > $db{$key}->{'no-notify'} = 1; > } > $db{$key}->{'end'} = $end; > + $db{$key}->{'notify'} = $end if $db{$key}->{'notify'} > $end; > } > } > > -sub sortStart { > - my $as = $db{$a}->{'start'}; > - my $bs = $db{$b}->{'start'}; > - my $val; > - > - $val = $as <=> $bs; > - $val = $a cmp $b if $val == 0; > - > - return $val; > -} > - > my $re_sort = 1; > my @sorted_keys; > > @@ -670,9 +666,13 @@ if ($correct_durations) { > next unless exists $db{$key}->{'no-end'}; > last if $pos == $#{$timeline}; > > - # Shift following request to start after the current one > + # Shift following request to start after the current > + # one, but only if that wouldn't make it zero duration, > + # which would indicate notify arrived after context > + # complete. > $next_key = ${$timeline}[$pos + 1]; > - if (exists $db{$key}->{'notify'}) { > + if (exists $db{$key}->{'notify'} and > + $db{$key}->{'notify'} < $db{$key}->{'end'}) { > $db{$next_key}->{'engine-start'} = $db{$next_key}->{'start'}; > $db{$next_key}->{'start'} = $db{$key}->{'notify'}; > $re_sort = 1; > @@ -750,9 +750,9 @@ foreach my $gid (sort keys %rings) { > # Extract all GPU busy intervals and sort them. > foreach my $key (@sorted_keys) { > next unless $db{$key}->{'ring'} eq $ring; > + die if $db{$key}->{'start'} > $db{$key}->{'end'}; Heh, we're out of luck if we want to trace across seqno wraparound. It makes enough sense, Reviewed-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx