On Tue, Oct 25, 2016 at 12:19:29AM +0100, Robert Bragg wrote: > +static int claim_specific_ctx(struct i915_perf_stream *stream) > +{ > + struct drm_i915_private *dev_priv = stream->dev_priv; > + struct i915_vma *vma; > + int ret; > + > + ret = i915_mutex_lock_interruptible(&dev_priv->drm); Looking forward to the day these don't need struct_mutex. > + if (ret) > + return ret; > + > + /* So that we don't have to worry about updating the context ID > + * in OACONTOL on the fly we make sure to pin the context > + * upfront for the lifetime of the stream... > + */ > + vma = stream->ctx->engine[RCS].state; There's a caveat here that suggests I had better wrap up this into its own function. (We need to flush dirty cachelines to memory on first binding of the context.) > + ret = i915_vma_pin(vma, 0, stream->ctx->ggtt_alignment, > + PIN_GLOBAL | PIN_HIGH); > + if (ret) > + return ret; Oops. > + > + dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma); > + > + mutex_unlock(&dev_priv->drm.struct_mutex); > + > + return 0; > +} > +static int alloc_oa_buffer(struct drm_i915_private *dev_priv) > +{ > + struct drm_i915_gem_object *bo; > + struct i915_vma *vma; > + int ret; > + > + BUG_ON(dev_priv->perf.oa.oa_buffer.vma); > + > + ret = i915_mutex_lock_interruptible(&dev_priv->drm); > + if (ret) > + return ret; > + > + BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); > + BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); > + > + bo = i915_gem_object_create(&dev_priv->drm, OA_BUFFER_SIZE); > + if (IS_ERR(bo)) { > + DRM_ERROR("Failed to allocate OA buffer\n"); > + ret = PTR_ERR(bo); > + goto unlock; > + } > + > + ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC); > + if (ret) > + goto err_unref; > + > + /* PreHSW required 512K alignment, HSW requires 16M */ > + vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, PIN_MAPPABLE); Does this need mappable aperture space for OA? You aren't accessing it via the aperture, but is the hw limited to it? > + if (IS_ERR(vma)) { > + ret = PTR_ERR(vma); > + goto err_unref; > + } > + dev_priv->perf.oa.oa_buffer.vma = vma; > + > + dev_priv->perf.oa.oa_buffer.vaddr = > + i915_gem_object_pin_map(bo, I915_MAP_WB); > + if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) { > + ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr); > + goto err_unpin; > + } > + > + dev_priv->perf.oa.ops.init_oa_buffer(dev_priv); > + > + DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p", > + i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma), > + dev_priv->perf.oa.oa_buffer.vaddr); > + > + goto unlock; > + > +err_unpin: > + __i915_vma_unpin(vma); > + > +err_unref: > + i915_gem_object_put(bo); > + > + dev_priv->perf.oa.oa_buffer.vaddr = NULL; > + dev_priv->perf.oa.oa_buffer.vma = NULL; > + > +unlock: > + mutex_unlock(&dev_priv->drm.struct_mutex); > + return ret; > +} > + if (ret >= 0) { > + /* Maybe make ->pollin per-stream state if we support multiple > + * concurrent streams in the future. */ > + atomic_set(&dev_priv->perf.oa.pollin, false); > + } > + > return ret; > } > > -static unsigned int i915_perf_poll_locked(struct i915_perf_stream *stream, > +static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) > +{ > + struct drm_i915_private *dev_priv = > + container_of(hrtimer, typeof(*dev_priv), > + perf.oa.poll_check_timer); > + > + if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) { > + atomic_set(&dev_priv->perf.oa.pollin, true); > + wake_up(&dev_priv->perf.oa.poll_wq); > + } > + > + hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); > + > + return HRTIMER_RESTART; > +} > + > +static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv, > + struct i915_perf_stream *stream, > struct file *file, > poll_table *wait) > { > - unsigned int streams = 0; > + unsigned int events = 0; > > stream->ops->poll_wait(stream, file, wait); > > - if (stream->ops->can_read(stream)) > - streams |= POLLIN; > + /* Note: we don't explicitly check whether there's something to read > + * here since this path may be very hot depending on what else > + * userspace is polling, or on the timeout in use. We rely solely on > + * the hrtimer/oa_poll_check_timer_cb to notify us when there are > + * samples to read. > + */ > + if (atomic_read(&dev_priv->perf.oa.pollin)) > + events |= POLLIN; The atomic_set() and atomic_read() are superfluous, they don't even impose any memory barriers. The required barrier here is from wake_up(). You can just use dev_priv->perf.ao.pollin = true; WRITE_ONCE() / READ_ONCE() if you want to clearly show that it is outside of the lock and barriers are imposed elsewhere. > @@ -285,18 +1177,18 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, > + /* we avoid simply assigning stream->sample_flags = props->sample_flags > + * to have _stream_init check the combination of sample flags more > + * thoroughly, but still this is the expected result at this point. > */ > - DRM_ERROR("Unsupported i915 perf stream configuration\n"); > - ret = -EINVAL; > - goto err_alloc; > + BUG_ON(stream->sample_flags != props->sample_flags); if (WARN_ON(...)) { ret = -ENODEV; goto err_alloc; } just to avoid checkpatch complaining. -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx