On Thu, Jun 22, 2017 at 11:55:51AM +0100, Chris Wilson wrote: > Once a client has requested a waitboost, we keep that waitboost active > until all clients are no longer waiting. This is because we don't > distinguish which waiter deserves the boost. However, with the advent of > fence signaling, the signaler threads appear as waiters to the RPS > interrupt handler. So instead of using a single boolean to track when to > keep the waitboost active, use a counter of all outstanding waitboosted > requests. > > At this point, I have removed all vestiges of the rate limiting on > clients. Whilst this means that compositors should remain more fluid, > it also means that boosts are more prevalent. > > A drawback of this implementation is that it requires constant request > submission to keep the waitboost trimmed (as it is now cancelled when the > request is completed). This will be fine for a busy system, but near > idle the boosts may be kept for longer than desired (effectively tens of > vblanks worstcase) and there is a reliance on rc6 instead. In other words, now we're disabling boost when all requests that required boost are retired. We may need to tweak igt pm_rps boost scenarios to account for that extra time. > > Reported-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> [SNIP] > @@ -2347,11 +2349,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) > > rcu_read_lock(); > task = pid_task(file->pid, PIDTYPE_PID); > - seq_printf(m, "%s [%d]: %d boosts%s\n", > + seq_printf(m, "%s [%d]: %d boosts\n", > task ? task->comm : "<unknown>", > task ? task->pid : -1, > - file_priv->rps.boosts, > - list_empty(&file_priv->rps.link) ? "" : ", active"); > + atomic_read(&file_priv->rps.boosts)); > rcu_read_unlock(); > } > seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts); dev_priv->rps.boosts seems to be unused at this point, could you remove it while you're here? With that: Reviewed-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> -Michał > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 30e89456fc61..95e164387363 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -584,8 +584,7 @@ struct drm_i915_file_private { > struct idr context_idr; > > struct intel_rps_client { > - struct list_head link; > - unsigned boosts; > + atomic_t boosts; > } rps; > > unsigned int bsd_engine; > @@ -1304,8 +1303,7 @@ struct intel_gen6_power_mgmt { > enum { LOW_POWER, BETWEEN, HIGH_POWER } power; > > spinlock_t client_lock; > - struct list_head clients; > - bool client_boost; > + atomic_t num_waiters; > > bool enabled; > struct delayed_work autoenable_work; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index ae3ce1314bd1..7391e2d36a31 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -388,7 +388,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, > */ > if (rps) { > if (INTEL_GEN(rq->i915) >= 6) > - gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies); > + gen6_rps_boost(rq, rps); > else > rps = NULL; > } > @@ -399,22 +399,6 @@ i915_gem_object_wait_fence(struct dma_fence *fence, > if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) > i915_gem_request_retire_upto(rq); > > - if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { > - /* The GPU is now idle and this client has stalled. > - * Since no other client has submitted a request in the > - * meantime, assume that this client is the only one > - * supplying work to the GPU but is unable to keep that > - * work supplied because it is waiting. Since the GPU is > - * then never kept fully busy, RPS autoclocking will > - * keep the clocks relatively low, causing further delays. > - * Compensate by giving the synchronous client credit for > - * a waitboost next time. > - */ > - spin_lock(&rq->i915->rps.client_lock); > - list_del_init(&rps->link); > - spin_unlock(&rq->i915->rps.client_lock); > - } > - > return timeout; > } > > @@ -5065,12 +5049,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) > list_for_each_entry(request, &file_priv->mm.request_list, client_link) > request->file_priv = NULL; > spin_unlock(&file_priv->mm.lock); > - > - if (!list_empty(&file_priv->rps.link)) { > - spin_lock(&to_i915(dev)->rps.client_lock); > - list_del(&file_priv->rps.link); > - spin_unlock(&to_i915(dev)->rps.client_lock); > - } > } > > int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) > @@ -5087,7 +5065,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) > file->driver_priv = file_priv; > file_priv->dev_priv = i915; > file_priv->file = file; > - INIT_LIST_HEAD(&file_priv->rps.link); > > spin_lock_init(&file_priv->mm.lock); > INIT_LIST_HEAD(&file_priv->mm.request_list); > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 8c59c79cbd8b..483af8921060 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -384,7 +384,11 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) > engine->context_unpin(engine, engine->last_retired_context); > engine->last_retired_context = request->ctx; > > - dma_fence_signal(&request->fence); > + spin_lock_irq(&request->lock); > + if (request->waitboost) > + atomic_dec(&request->i915->rps.num_waiters); > + dma_fence_signal_locked(&request->fence); > + spin_unlock_irq(&request->lock); > > i915_priotree_fini(request->i915, &request->priotree); > i915_gem_request_put(request); > @@ -639,6 +643,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, > req->file_priv = NULL; > req->batch = NULL; > req->capture_list = NULL; > + req->waitboost = false; > > /* > * Reserve space in the ring buffer for all the commands required to > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 7b7c84369d78..604e131470a1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -184,6 +184,8 @@ struct drm_i915_gem_request { > /** Time at which this request was emitted, in jiffies. */ > unsigned long emitted_jiffies; > > + bool waitboost; > + > /** engine->request_list entry for this request */ > struct list_head link; > > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index b1c7d1a04612..61047ee2eede 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1091,18 +1091,6 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) > return events; > } > > -static bool any_waiters(struct drm_i915_private *dev_priv) > -{ > - struct intel_engine_cs *engine; > - enum intel_engine_id id; > - > - for_each_engine(engine, dev_priv, id) > - if (intel_engine_has_waiter(engine)) > - return true; > - > - return false; > -} > - > static void gen6_pm_rps_work(struct work_struct *work) > { > struct drm_i915_private *dev_priv = > @@ -1114,7 +1102,7 @@ static void gen6_pm_rps_work(struct work_struct *work) > spin_lock_irq(&dev_priv->irq_lock); > if (dev_priv->rps.interrupts_enabled) { > pm_iir = fetch_and_zero(&dev_priv->rps.pm_iir); > - client_boost = fetch_and_zero(&dev_priv->rps.client_boost); > + client_boost = atomic_read(&dev_priv->rps.num_waiters); > } > spin_unlock_irq(&dev_priv->irq_lock); > > @@ -1131,7 +1119,7 @@ static void gen6_pm_rps_work(struct work_struct *work) > new_delay = dev_priv->rps.cur_freq; > min = dev_priv->rps.min_freq_softlimit; > max = dev_priv->rps.max_freq_softlimit; > - if (client_boost || any_waiters(dev_priv)) > + if (client_boost) > max = dev_priv->rps.max_freq; > if (client_boost && new_delay < dev_priv->rps.boost_freq) { > new_delay = dev_priv->rps.boost_freq; > @@ -1144,7 +1132,7 @@ static void gen6_pm_rps_work(struct work_struct *work) > > if (new_delay >= dev_priv->rps.max_freq_softlimit) > adj = 0; > - } else if (client_boost || any_waiters(dev_priv)) { > + } else if (client_boost) { > adj = 0; > } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { > if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index d93efb49a2e2..d17a32437f07 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -1858,9 +1858,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); > void gen6_rps_busy(struct drm_i915_private *dev_priv); > void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); > void gen6_rps_idle(struct drm_i915_private *dev_priv); > -void gen6_rps_boost(struct drm_i915_private *dev_priv, > - struct intel_rps_client *rps, > - unsigned long submitted); > +void gen6_rps_boost(struct drm_i915_gem_request *rq, > + struct intel_rps_client *rps); > void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req); > void g4x_wm_get_hw_state(struct drm_device *dev); > void vlv_wm_get_hw_state(struct drm_device *dev); > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index b5b7372fcddc..fabea61ca0b6 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -6125,47 +6125,36 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) > gen6_sanitize_rps_pm_mask(dev_priv, ~0)); > } > mutex_unlock(&dev_priv->rps.hw_lock); > - > - spin_lock(&dev_priv->rps.client_lock); > - while (!list_empty(&dev_priv->rps.clients)) > - list_del_init(dev_priv->rps.clients.next); > - spin_unlock(&dev_priv->rps.client_lock); > } > > -void gen6_rps_boost(struct drm_i915_private *dev_priv, > - struct intel_rps_client *rps, > - unsigned long submitted) > +void gen6_rps_boost(struct drm_i915_gem_request *rq, > + struct intel_rps_client *rps) > { > + struct drm_i915_private *i915 = rq->i915; > + bool boost; > + > /* This is intentionally racy! We peek at the state here, then > * validate inside the RPS worker. > */ > - if (!(dev_priv->gt.awake && > - dev_priv->rps.enabled && > - dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) > + if (!i915->rps.enabled) > return; > > - /* Force a RPS boost (and don't count it against the client) if > - * the GPU is severely congested. > - */ > - if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) > - rps = NULL; > - > - spin_lock(&dev_priv->rps.client_lock); > - if (rps == NULL || list_empty(&rps->link)) { > - spin_lock_irq(&dev_priv->irq_lock); > - if (dev_priv->rps.interrupts_enabled) { > - dev_priv->rps.client_boost = true; > - schedule_work(&dev_priv->rps.work); > - } > - spin_unlock_irq(&dev_priv->irq_lock); > - > - if (rps != NULL) { > - list_add(&rps->link, &dev_priv->rps.clients); > - rps->boosts++; > - } else > - dev_priv->rps.boosts++; > + boost = false; > + spin_lock_irq(&rq->lock); > + if (!rq->waitboost && !i915_gem_request_completed(rq)) { > + atomic_inc(&i915->rps.num_waiters); > + rq->waitboost = true; > + boost = true; > } > - spin_unlock(&dev_priv->rps.client_lock); > + spin_unlock_irq(&rq->lock); > + if (!boost) > + return; > + > + if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) > + schedule_work(&i915->rps.work); > + > + if (rps != NULL) > + atomic_inc(&rps->boosts); > } > > int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) > @@ -9112,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work) > struct drm_i915_gem_request *req = boost->req; > > if (!i915_gem_request_completed(req)) > - gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); > + gen6_rps_boost(req, NULL); > > i915_gem_request_put(req); > kfree(boost); > @@ -9141,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) > void intel_pm_setup(struct drm_i915_private *dev_priv) > { > mutex_init(&dev_priv->rps.hw_lock); > - spin_lock_init(&dev_priv->rps.client_lock); > > INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, > __intel_autoenable_gt_powersave); > - INIT_LIST_HEAD(&dev_priv->rps.clients); > + atomic_set(&dev_priv->rps.num_waiters, 0); > > dev_priv->pm.suspended = false; > atomic_set(&dev_priv->pm.wakeref_count, 0); > -- > 2.11.0 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx