Re: [PATCH] drm/i915/gt: Prune 'inline' from execlists

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> Remove the extraneous inlines. The only split by the compiler that
> looked dubious was execlists_schedule_out, so push the code around
> slightly to move all the work into the out-of-line function.
>
> In a normal build, bloat-o-meter shows that only the
> execlists_schedule_out is contentious:
>
> add/remove: 1/0 grow/shrink: 0/2 up/down: 803/-1532 (-729)
> Function                                     old     new   delta
> __execlists_schedule_out                       -     803    +803
> execlists_submission_tasklet                6488    5766    -722
> execlists_reset_csb.constprop               1587     777    -810
> Total: Before=1605815, After=1605086, chg -0.05%
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Jani Nikula <jani.nikula@xxxxxxxxx>

Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>

> ---
>  .../drm/i915/gt/intel_execlists_submission.c  | 63 +++++++++----------
>  1 file changed, 29 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index d7d5a58990bb..33c7495b12b1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -230,8 +230,7 @@ active_request(const struct intel_timeline * const tl, struct i915_request *rq)
>  	return __active_request(tl, rq, 0);
>  }
>  
> -static inline void
> -ring_set_paused(const struct intel_engine_cs *engine, int state)
> +static void ring_set_paused(const struct intel_engine_cs *engine, int state)
>  {
>  	/*
>  	 * We inspect HWS_PREEMPT with a semaphore inside
> @@ -244,12 +243,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int state)
>  		wmb();
>  }
>  
> -static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +static struct i915_priolist *to_priolist(struct rb_node *rb)
>  {
>  	return rb_entry(rb, struct i915_priolist, node);
>  }
>  
> -static inline int rq_prio(const struct i915_request *rq)
> +static int rq_prio(const struct i915_request *rq)
>  {
>  	return READ_ONCE(rq->sched.attr.priority);
>  }
> @@ -299,8 +298,8 @@ static int virtual_prio(const struct intel_engine_execlists *el)
>  	return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
>  }
>  
> -static inline bool need_preempt(const struct intel_engine_cs *engine,
> -				const struct i915_request *rq)
> +static bool need_preempt(const struct intel_engine_cs *engine,
> +			 const struct i915_request *rq)
>  {
>  	int last_prio;
>  
> @@ -351,7 +350,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>  		   queue_prio(&engine->execlists)) > last_prio;
>  }
>  
> -__maybe_unused static inline bool
> +__maybe_unused static bool
>  assert_priority_queue(const struct i915_request *prev,
>  		      const struct i915_request *next)
>  {
> @@ -418,7 +417,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>  	return __unwind_incomplete_requests(engine);
>  }
>  
> -static inline void
> +static void
>  execlists_context_status_change(struct i915_request *rq, unsigned long status)
>  {
>  	/*
> @@ -503,7 +502,7 @@ static void reset_active(struct i915_request *rq,
>  	ce->lrc.lrca = lrc_update_regs(ce, engine, head);
>  }
>  
> -static inline struct intel_engine_cs *
> +static struct intel_engine_cs *
>  __execlists_schedule_in(struct i915_request *rq)
>  {
>  	struct intel_engine_cs * const engine = rq->engine;
> @@ -549,7 +548,7 @@ __execlists_schedule_in(struct i915_request *rq)
>  	return engine;
>  }
>  
> -static inline void execlists_schedule_in(struct i915_request *rq, int idx)
> +static void execlists_schedule_in(struct i915_request *rq, int idx)
>  {
>  	struct intel_context * const ce = rq->context;
>  	struct intel_engine_cs *old;
> @@ -608,9 +607,9 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>  		tasklet_hi_schedule(&ve->base.execlists.tasklet);
>  }
>  
> -static inline void __execlists_schedule_out(struct i915_request *rq)
> +static void __execlists_schedule_out(struct i915_request * const rq,
> +				     struct intel_context * const ce)
>  {
> -	struct intel_context * const ce = rq->context;
>  	struct intel_engine_cs * const engine = rq->engine;
>  	unsigned int ccid;
>  
> @@ -621,6 +620,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq)
>  	 */
>  
>  	CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
> +	GEM_BUG_ON(ce->inflight != engine);
>  
>  	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>  		lrc_check_regs(ce, engine, "after");
> @@ -660,10 +660,12 @@ static inline void __execlists_schedule_out(struct i915_request *rq)
>  	 */
>  	if (ce->engine != engine)
>  		kick_siblings(rq, ce);
> +
> +	WRITE_ONCE(ce->inflight, NULL);
> +	intel_context_put(ce);
>  }
>  
> -static inline void
> -execlists_schedule_out(struct i915_request *rq)
> +static inline void execlists_schedule_out(struct i915_request *rq)
>  {
>  	struct intel_context * const ce = rq->context;
>  
> @@ -671,12 +673,8 @@ execlists_schedule_out(struct i915_request *rq)
>  
>  	GEM_BUG_ON(!ce->inflight);
>  	ce->inflight = ptr_dec(ce->inflight);
> -	if (!__intel_context_inflight_count(ce->inflight)) {
> -		GEM_BUG_ON(ce->inflight != rq->engine);
> -		__execlists_schedule_out(rq);
> -		WRITE_ONCE(ce->inflight, NULL);
> -		intel_context_put(ce);
> -	}
> +	if (!__intel_context_inflight_count(ce->inflight))
> +		__execlists_schedule_out(rq, ce);
>  
>  	i915_request_put(rq);
>  }
> @@ -728,7 +726,7 @@ static u64 execlists_update_context(struct i915_request *rq)
>  	return desc;
>  }
>  
> -static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
> +static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
>  {
>  	if (execlists->ctrl_reg) {
>  		writel(lower_32_bits(desc), execlists->submit_reg + port * 2);
> @@ -757,7 +755,7 @@ dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
>  	return buf;
>  }
>  
> -static __maybe_unused void
> +static __maybe_unused noinline void
>  trace_ports(const struct intel_engine_execlists *execlists,
>  	    const char *msg,
>  	    struct i915_request * const *ports)
> @@ -774,13 +772,13 @@ trace_ports(const struct intel_engine_execlists *execlists,
>  		     dump_port(p1, sizeof(p1), ", ", ports[1]));
>  }
>  
> -static inline bool
> +static bool
>  reset_in_progress(const struct intel_engine_execlists *execlists)
>  {
>  	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>  }
>  
> -static __maybe_unused bool
> +static __maybe_unused noinline bool
>  assert_pending_valid(const struct intel_engine_execlists *execlists,
>  		     const char *msg)
>  {
> @@ -1621,12 +1619,12 @@ static void execlists_dequeue_irq(struct intel_engine_cs *engine)
>  	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
>  }
>  
> -static inline void clear_ports(struct i915_request **ports, int count)
> +static void clear_ports(struct i915_request **ports, int count)
>  {
>  	memset_p((void **)ports, NULL, count);
>  }
>  
> -static inline void
> +static void
>  copy_ports(struct i915_request **dst, struct i915_request **src, int count)
>  {
>  	/* A memcpy_p() would be very useful here! */
> @@ -1660,8 +1658,7 @@ cancel_port_requests(struct intel_engine_execlists * const execlists,
>  	return inactive;
>  }
>  
> -static inline void
> -invalidate_csb_entries(const u64 *first, const u64 *last)
> +static void invalidate_csb_entries(const u64 *first, const u64 *last)
>  {
>  	clflush((void *)first);
>  	clflush((void *)last);
> @@ -1693,7 +1690,7 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static inline bool gen12_csb_parse(const u64 csb)
> +static bool gen12_csb_parse(const u64 csb)
>  {
>  	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
>  	bool new_queue =
> @@ -1720,7 +1717,7 @@ static inline bool gen12_csb_parse(const u64 csb)
>  	return false;
>  }
>  
> -static inline bool gen8_csb_parse(const u64 csb)
> +static bool gen8_csb_parse(const u64 csb)
>  {
>  	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
>  }
> @@ -1759,8 +1756,7 @@ wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
>  	return entry;
>  }
>  
> -static inline u64
> -csb_read(const struct intel_engine_cs *engine, u64 * const csb)
> +static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb)
>  {
>  	u64 entry = READ_ONCE(*csb);
>  
> @@ -3180,8 +3176,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>  	}
>  }
>  
> -static inline void
> -logical_ring_default_irqs(struct intel_engine_cs *engine)
> +static void logical_ring_default_irqs(struct intel_engine_cs *engine)
>  {
>  	unsigned int shift = 0;
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux