On 02/18/2016 06:27 AM, John.C.Harrison@xxxxxxxxx wrote: > From: John Harrison <John.C.Harrison@xxxxxxxxx> > > The scheduler decouples the submission of batch buffers to the driver > from their subsequent submission to the hardware. This means that an > application which is continuously submitting buffers as fast as it can > could potentialy flood the driver. To prevent this, the driver now > tracks how many buffers are in progress (queued in software or > executing in hardware) and limits this to a given (tunable) number. If > this number is exceeded then the queue to the driver will return > EAGAIN and thus prevent the scheduler's queue becoming arbitrarily > large. > > v3: Added a missing decrement of the file queue counter. > > v4: Updated a comment. > > v5: Updated due to changes to earlier patches in series - removing > forward declarations and white space. Also added some documentation. > [Joonas Lahtinen] > > For: VIZ-1587 > Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +++++ > drivers/gpu/drm/i915/i915_scheduler.c | 48 ++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_scheduler.h | 2 ++ > 4 files changed, 60 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 071a27b..3f4c4f0 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -336,6 +336,8 @@ struct drm_i915_file_private { > } rps; > > struct intel_engine_cs *bsd_ring; > + > + u32 scheduler_queue_length; > }; > > enum intel_dpll_id { > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index d4de8c7..dff120c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -1803,6 +1803,10 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, > return -EINVAL; > } > > + /* Throttle batch requests per device file */ > + if (i915_scheduler_file_queue_is_full(file)) > + return -EAGAIN; > + > /* Copy in the exec list from userland */ > exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); > exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); > @@ -1893,6 +1897,10 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, > return -EINVAL; > } > > + /* Throttle batch requests per device file */ > + if (i915_scheduler_file_queue_is_full(file)) > + return -EAGAIN; > + > exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, > GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); > if (exec2_list == NULL) > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c > index e56ce08..f7f29d5 100644 > --- a/drivers/gpu/drm/i915/i915_scheduler.c > +++ b/drivers/gpu/drm/i915/i915_scheduler.c > @@ -69,6 +69,7 @@ int i915_scheduler_init(struct drm_device *dev) > scheduler->priority_level_bump = 50; > scheduler->priority_level_preempt = 900; > scheduler->min_flying = 2; > + scheduler->file_queue_max = 64; > > dev_priv->scheduler = scheduler; > > @@ -464,6 +465,44 @@ static int i915_scheduler_submit_unlocked(struct intel_engine_cs *ring) > return ret; > } > > +/** > + * i915_scheduler_file_queue_is_full - Returns true if the queue is full. > + * @file: File object to query. > + * This allows throttling of applications by limiting the total number of > + * outstanding requests to a specified level. Once that limit is reached, > + * this call will return true and no more requests should be accepted. > + */ > +bool i915_scheduler_file_queue_is_full(struct drm_file *file) > +{ > + struct drm_i915_file_private *file_priv = file->driver_priv; > + struct drm_i915_private *dev_priv = file_priv->dev_priv; > + struct i915_scheduler *scheduler = dev_priv->scheduler; > + > + return file_priv->scheduler_queue_length >= scheduler->file_queue_max; > +} > + > +/** > + * i915_scheduler_file_queue_inc - Increment the file's request queue count. > + * @file: File object to process. > + */ > +static void i915_scheduler_file_queue_inc(struct drm_file *file) > +{ > + struct drm_i915_file_private *file_priv = file->driver_priv; > + > + file_priv->scheduler_queue_length++; > +} > + > +/** > + * i915_scheduler_file_queue_dec - Decrement the file's request queue count. > + * @file: File object to process. > + */ > +static void i915_scheduler_file_queue_dec(struct drm_file *file) > +{ > + struct drm_i915_file_private *file_priv = file->driver_priv; > + > + file_priv->scheduler_queue_length--; > +} > + > static void i915_generate_dependencies(struct i915_scheduler *scheduler, > struct i915_scheduler_queue_entry *node, > uint32_t ring) > @@ -640,6 +679,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe) > > list_add_tail(&node->link, &scheduler->node_queue[ring->id]); > > + i915_scheduler_file_queue_inc(node->params.file); > + > not_flying = i915_scheduler_count_flying(scheduler, ring) < > scheduler->min_flying; > > @@ -883,6 +924,12 @@ static bool i915_scheduler_remove(struct i915_scheduler *scheduler, > /* Strip the dependency info while the mutex is still locked */ > i915_scheduler_remove_dependent(scheduler, node); > > + /* Likewise clean up the file pointer. */ > + if (node->params.file) { > + i915_scheduler_file_queue_dec(node->params.file); > + node->params.file = NULL; > + } > + > continue; > } > > @@ -1205,6 +1252,7 @@ int i915_scheduler_closefile(struct drm_device *dev, struct drm_file *file) > node->status, > ring->name); > > + i915_scheduler_file_queue_dec(node->params.file); > node->params.file = NULL; > } > } > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h > index 075befb..b78de12 100644 > --- a/drivers/gpu/drm/i915/i915_scheduler.h > +++ b/drivers/gpu/drm/i915/i915_scheduler.h > @@ -77,6 +77,7 @@ struct i915_scheduler { > int32_t priority_level_bump; > int32_t priority_level_preempt; > uint32_t min_flying; > + uint32_t file_queue_max; > }; > > /* Flag bits for i915_scheduler::flags */ > @@ -100,5 +101,6 @@ int i915_scheduler_flush_stamp(struct intel_engine_cs *ring, > unsigned long stamp, bool is_locked); > bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req, > bool *completed, bool *busy); > +bool i915_scheduler_file_queue_is_full(struct drm_file *file); > > #endif /* _I915_SCHEDULER_H_ */ > Just to clarify and make sure I understood the previous stuff: a queued execbuf that has not yet been dispatched does not reserve and pin pages right? That occurs at actual dispatch time? If so, I guess clients will hit this 64 queued item limit pretty regularly... How much metadata overhead does that involve? Has it been derived from some performance work with a bunch of workloads? It's fine if not, I can imagine that different mixes of workloads would be affected by lower or higher queue depths (e.g. small batch tests). If this is tunable, I guess it should be clamped like a nice or rlimit value, with values outside that range requiring CAP_SYS_ADMIN. Reviewed-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx> Jesse _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx