On Thu, May 07, 2015 at 03:15:50PM +0100, Robert Bragg wrote: > + /* We bypass the default perf core perf_paranoid_cpu() || > + * CAP_SYS_ADMIN check by using the PERF_PMU_CAP_IS_DEVICE > + * flag and instead authenticate based on whether the current > + * pid owns the specified context, or require CAP_SYS_ADMIN > + * when collecting cross-context metrics. > + */ > + dev_priv->oa_pmu.specific_ctx = NULL; > + if (oa_attr.single_context) { > + u32 ctx_id = oa_attr.ctx_id; > + unsigned int drm_fd = oa_attr.drm_fd; > + struct fd fd = fdget(drm_fd); > + > + if (fd.file) { Specify a ctx and not providing the right fd should be its own error, either EBADF or EINVAL. > + dev_priv->oa_pmu.specific_ctx = > + lookup_context(dev_priv, fd.file, ctx_id); > + } Missing fdput > + } > + > + if (!dev_priv->oa_pmu.specific_ctx && !capable(CAP_SYS_ADMIN)) > + return -EACCES; > + > + mutex_lock(&dev_priv->dev->struct_mutex); i915_mutex_interruptible, probably best to couple into the GPU error handling here as well especially as init_oa_buffer() will go onto touch GPU internals. > + ret = init_oa_buffer(event); > + mutex_unlock(&dev_priv->dev->struct_mutex); > + > + if (ret) > + return ret; > + > + BUG_ON(dev_priv->oa_pmu.exclusive_event); > + dev_priv->oa_pmu.exclusive_event = event; > + > + event->destroy = i915_oa_event_destroy; > + > + /* PRM - observability performance counters: > + * > + * OACONTROL, performance counter enable, note: > + * > + * "When this bit is set, in order to have coherent counts, > + * RC6 power state and trunk clock gating must be disabled. > + * This can be achieved by programming MMIO registers as > + * 0xA094=0 and 0xA090[31]=1" > + * > + * In our case we are expected that taking pm + FORCEWAKE > + * references will effectively disable RC6 and trunk clock > + * gating. > + */ > + intel_runtime_pm_get(dev_priv); > + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); That is a nuisance. Aside: Why isn't OA inside the powerctx? Is a subset valid with forcewake? It does perturb the system greatly to disable rc6, so I wonder if it could be made optional? > + > + return 0; > +} > + > +static void update_oacontrol(struct drm_i915_private *dev_priv) > +{ > + BUG_ON(!spin_is_locked(&dev_priv->oa_pmu.lock)); > + > + if (dev_priv->oa_pmu.event_active) { > + unsigned long ctx_id = 0; > + bool pinning_ok = false; > + > + if (dev_priv->oa_pmu.specific_ctx) { > + struct intel_context *ctx = > + dev_priv->oa_pmu.specific_ctx; > + struct drm_i915_gem_object *obj = > + ctx->legacy_hw_ctx.rcs_state; If only there was ctx->legacy_hw_ctx.rcs_vma... > + > + if (i915_gem_obj_is_pinned(obj)) { > + ctx_id = i915_gem_obj_ggtt_offset(obj); > + pinning_ok = true; > + } > + } > + > + if ((ctx_id == 0 || pinning_ok)) { > + bool periodic = dev_priv->oa_pmu.periodic; > + u32 period_exponent = dev_priv->oa_pmu.period_exponent; > + u32 report_format = dev_priv->oa_pmu.oa_buffer.format; > + > + I915_WRITE(GEN7_OACONTROL, > + (ctx_id & GEN7_OACONTROL_CTX_MASK) | > + (period_exponent << > + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | > + (periodic ? > + GEN7_OACONTROL_TIMER_ENABLE : 0) | > + (report_format << > + GEN7_OACONTROL_FORMAT_SHIFT) | > + (ctx_id ? > + GEN7_OACONTROL_PER_CTX_ENABLE : 0) | > + GEN7_OACONTROL_ENABLE); I notice you don't use any write barriers... -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel