Re: [PATCH v14 05/15] tracing: Add conditional snapshot

Steven Rostedt <rostedt@xxxxxxxxxxx> · Wed, 13 Feb 2019 14:18:17 -0500

On Tue,  5 Feb 2019 16:41:51 -0600
Tom Zanussi <zanussi@xxxxxxxxxx> wrote:

> +/**
> + * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
> + * @tr:		The tracing instance
> + * @cond_data:	User data to associate with the snapshot
> + * @update:	Implementation of the cond_snapshot update function
> + *
> + * Check whether the conditional snapshot for the given instance has
> + * already been enabled, or if the current tracer is already using a
> + * snapshot; if so, return -EBUSY, else create a cond_snapshot and
> + * save the cond_data and update function inside.
> + *
> + * Returns 0 if successful, error otherwise.
> + */
> +int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
> +				 cond_update_fn_t update)
> +{
> +	struct cond_snapshot *cond_snapshot;
> +	int ret = 0;
> +
> +	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
> +	if (!cond_snapshot)
> +		return -ENOMEM;
> +
> +	cond_snapshot->cond_data = cond_data;
> +	cond_snapshot->update = update;
> +
> +	mutex_lock(&trace_types_lock);
> +
> +	ret = tracing_alloc_snapshot_instance(tr);
> +	if (ret) {
> +		kfree(cond_snapshot);
> +		return ret;

Just returned while holding trace_types_lock.

Best to have:

	if (ret)
		goto fail_unlock;

> +	}
> +
> +	if (tr->current_trace->use_max_tr) {
> +		mutex_unlock(&trace_types_lock);
> +		kfree(cond_snapshot);
> +		return -EBUSY;
> +	}
> +
> +	arch_spin_lock(&tr->max_lock);
> +
> +	if (tr->cond_snapshot) {
> +		kfree(cond_snapshot);
> +		ret = -EBUSY;

Hmm, as the tr->cond_snapshot can only be set while holding the
trace_types_mutex (although it can be cleared without it), we can still
move this check up. Yeah, it may race with clearing (but do we care?)
but it makes the code a bit simpler.

	mutex_lock(&trace_types_lock);

	/*
	 * Because tr->cond_snapshot is only set to something other
	 * than NULL under the trace_types_lock, we can perform the
	 * busy test here. Worse case is that we return a -EBUSY just
	 * before it gets cleared. But do we really care about that?
	 */
	if (tr->cond_snapshot) {
		ret = -EBUSY;
		goto fail_unlock;
	}

	arch_spin_lock(&tr->max_lock);
	tr->cond_snapshot = cond_snapshot;
	arch_spin_unlock(&tr->max_lock);

> +	} else
> +		tr->cond_snapshot = cond_snapshot;
> +
> +	arch_spin_unlock(&tr->max_lock);
> +
> +	mutex_unlock(&trace_types_lock);
> +
> +	return ret;

fail_unlock:
	kfree(concd_snapshot);
	mutex_unlock(&trace_types_lock);
	return ret;

> +}
> +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
> +
> +/**
> + * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
> + * @tr:		The tracing instance
> + *
> + * Check whether the conditional snapshot for the given instance is
> + * enabled; if so, free the cond_snapshot associated with it,
> + * otherwise return -EINVAL.
> + *
> + * Returns 0 if successful, error otherwise.
> + */
> +int tracing_snapshot_cond_disable(struct trace_array *tr)
> +{
> +	int ret = 0;
> +
> +	arch_spin_lock(&tr->max_lock);
> +
> +	if (!tr->cond_snapshot)
> +		ret = -EINVAL;
> +	else {
> +		kfree(tr->cond_snapshot);
> +		tr->cond_snapshot = NULL;
> +	}
> +
> +	arch_spin_unlock(&tr->max_lock);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
>  #else
>  void tracing_snapshot(void)
>  {
>  	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
>  }
>  EXPORT_SYMBOL_GPL(tracing_snapshot);
> +void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
> +{
> +	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
> +}
> +EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
>  int tracing_alloc_snapshot(void)
>  {
>  	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
> @@ -1043,6 +1181,21 @@ void tracing_snapshot_alloc(void)
>  	tracing_snapshot();
>  }
>  EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
> +void *tracing_cond_snapshot_data(struct trace_array *tr)
> +{
> +	return NULL;
> +}
> +EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
> +int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
> +{
> +	return -ENODEV;
> +}
> +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
> +int tracing_snapshot_cond_disable(struct trace_array *tr)
> +{
> +	return false;
> +}
> +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
>  #endif /* CONFIG_TRACER_SNAPSHOT */
>  
>  void tracer_tracing_off(struct trace_array *tr)
> @@ -1354,12 +1507,14 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
>   * @tr: tracer
>   * @tsk: the task with the latency
>   * @cpu: The cpu that initiated the trace.
> + * @cond_data: User data associated with a conditional snapshot
>   *
>   * Flip the buffers between the @tr and the max_tr and record information
>   * about which task was the cause of this latency.
>   */
>  void
> -update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
> +update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
> +	      void *cond_data)
>  {
>  	if (tr->stop_count)
>  		return;
> @@ -1380,6 +1535,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
>  	else
>  		ring_buffer_record_off(tr->max_buffer.buffer);
>  
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {

Let's make this just a "goto out_unlock;";

And add the out_unlock just before the arch_spin_unlock.

> +		arch_spin_unlock(&tr->max_lock);
> +		return;
> +	}
> +#endif
>  	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
>  
>  	__update_max_tr(tr, tsk, cpu);
> @@ -5396,6 +5557,17 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
>  	if (t == tr->current_trace)
>  		goto out;
>  
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +	if (t->use_max_tr) {
> +		arch_spin_lock(&tr->max_lock);
> +		if (tr->cond_snapshot) {
> +			ret = -EBUSY;
> +			arch_spin_unlock(&tr->max_lock);
> +			goto out;
> +		}
> +		arch_spin_unlock(&tr->max_lock);
> +	}

How about:

	if (t->use_max_tr) {
		arch_spin_lock(&tr->max_lock);
		if (tr->cond_snapshot)
			ret = -EBUSY;
		arch_spin_unlock(&tr->max_lock);
		if (ret)
			goto out;
	}

> +#endif
>  	/* Some tracers won't work on kernel command line */
>  	if (system_state < SYSTEM_RUNNING && t->noboot) {
>  		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
> @@ -6478,6 +6650,14 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
>  		goto out;
>  	}
>  
> +	arch_spin_lock(&tr->max_lock);
> +	if (tr->cond_snapshot) {
> +		ret = -EBUSY;
> +		arch_spin_unlock(&tr->max_lock);
> +		goto out;
> +	}
> +	arch_spin_unlock(&tr->max_lock);

Same here.

> +
>  	switch (val) {
>  	case 0:
>  		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
> @@ -6503,7 +6683,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
>  		local_irq_disable();
>  		/* Now, we're going to swap */
>  		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
> -			update_max_tr(tr, current, smp_processor_id());
> +			update_max_tr(tr, current, smp_processor_id(), NULL);
>  		else
>  			update_max_tr_single(tr, current, iter->cpu_file);
>  		local_irq_enable();
> @@ -7095,7 +7275,7 @@ ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
>  		struct trace_array *tr, struct ftrace_probe_ops *ops,
>  		void *data)
>  {
> -	tracing_snapshot_instance(tr);
> +	tracing_snapshot_instance(tr, NULL);
>  }
>  
>  static void
> @@ -7117,7 +7297,7 @@ ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
>  		(*count)--;
>  	}
>  
> -	tracing_snapshot_instance(tr);
> +	tracing_snapshot_instance(tr, NULL);
>  }
>  
>  static int
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 08900828d282..23c1ed047868 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -194,6 +194,53 @@ struct trace_pid_list {
>  	unsigned long			*pids;
>  };
>  
> +typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
> +
> +/**
> + * struct cond_snapshot - conditional snapshot data and callback
> + *
> + * The cond_snapshot structure encapsulates a callback function and
> + * data associated with the snapshot for a given tracing instance.
> + *
> + * When a snapshot is taken conditionally, by invoking
> + * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is
> + * passed in turn to the cond_snapshot.update() function.  That data
> + * can be compared by the update() implementation with the cond_data
> + * contained wihin the struct cond_snapshot instance associated with
> + * the trace_array.  Because the tr->max_lock is held throughout the
> + * update() call, the update() function can directly retrieve the
> + * cond_snapshot and cond_data associated with the per-instance
> + * snapshot associated with the trace_array.
> + *
> + * The cond_snapshot.update() implementation can save data to be
> + * associated with the snapshot if it decides to, and returns 'true'
> + * in that case, or it returns 'false' if the conditional snapshot
> + * shouldn't be taken.
> + *
> + * The cond_snapshot instance is created and associated with the
> + * user-defined cond_data by tracing_cond_snapshot_enable().
> + * Likewise, the cond_snapshot instance is destroyed and is no longer
> + * associated with the trace instance by
> + * tracing_cond_snapshot_disable().
> + *
> + * The method below is required.
> + *
> + * @update: When a conditional snapshot is invoked, the update()
> + *	callback function is invoked with the tr->max_lock held.  The
> + *	update() implementation signals whether or not to actually
> + *	take the snapshot, by returning 'true' if so, 'false' if no
> + *	snapshot should be taken.  Because the max_lock is held for
> + *	the duration of update(), the implementation is safe to
> + *	directly retrieven and save any implementation data it needs
> + *	to in association with the snapshot.
> + */
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +struct cond_snapshot {
> +	void				*cond_data;
> +	cond_update_fn_t		update;
> +};
> +#endif

This is just defining a structure, not data or text. No need for the
#ifdef.

> +
>  /*
>   * The trace array - an array of per-CPU trace arrays. This is the
>   * highest level data structure that individual tracers deal with.
> @@ -277,6 +324,9 @@ struct trace_array {
>  #endif
>  	int			time_stamp_abs_ref;
>  	struct list_head	hist_vars;
> +#ifdef CONFIG_TRACER_SNAPSHOT
> +	struct cond_snapshot	*cond_snapshot;
> +#endif
>  };
>  
>  enum {
> @@ -727,7 +777,8 @@ int trace_pid_write(struct trace_pid_list *filtered_pids,
>  		    const char __user *ubuf, size_t cnt);
>  
>  #ifdef CONFIG_TRACER_MAX_TRACE
> -void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
> +void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
> +		   void *cond_data);
>  void update_max_tr_single(struct trace_array *tr,
>  			  struct task_struct *tsk, int cpu);
>  #endif /* CONFIG_TRACER_MAX_TRACE */
> @@ -1808,6 +1859,11 @@ static inline bool event_command_needs_rec(struct event_command *cmd_ops)
>  extern int trace_event_enable_disable(struct trace_event_file *file,
>  				      int enable, int soft_disable);
>  extern int tracing_alloc_snapshot(void);
> +extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data);
> +extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update);
> +
> +extern int tracing_snapshot_cond_disable(struct trace_array *tr);
> +extern void *tracing_cond_snapshot_data(struct trace_array *tr);
>  
>  extern const char *__start___trace_bprintk_fmt[];
>  extern const char *__stop___trace_bprintk_fmt[];
> @@ -1881,7 +1937,7 @@ static inline void trace_event_eval_update(struct trace_eval_map **map, int len)
>  #endif
>  
>  #ifdef CONFIG_TRACER_SNAPSHOT
> -void tracing_snapshot_instance(struct trace_array *tr);
> +void tracing_snapshot_instance(struct trace_array *tr, void *cond_data);
>  int tracing_alloc_snapshot_instance(struct trace_array *tr);
>  #else
>  static inline void tracing_snapshot_instance(struct trace_array *tr) { }
> diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
> index 2152d1e530cb..a77102a17046 100644
> --- a/kernel/trace/trace_events_trigger.c
> +++ b/kernel/trace/trace_events_trigger.c
> @@ -1049,7 +1049,7 @@ snapshot_trigger(struct event_trigger_data *data, void *rec,
>  	struct trace_event_file *file = data->private_data;
>  
>  	if (file)
> -		tracing_snapshot_instance(file->tr);
> +		tracing_snapshot_instance(file->tr, NULL);

Hmm, with all theses added NULLs, I wounder if it wouldn't just be
better to make another function

void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_var);

void tracing_snapshot_instance(struct trace_array *tr)
{
	tracing_snapshot_instance_cond(tr, NULL);
}

Then it wont be so intrusive, as there's more calls with NULL than
something added.

-- Steve

>  	else
>  		tracing_snapshot();
>  }
> diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
> index 4ea7e6845efb..007c074fe6d6 100644
> --- a/kernel/trace/trace_sched_wakeup.c
> +++ b/kernel/trace/trace_sched_wakeup.c
> @@ -482,7 +482,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
>  
>  	if (likely(!is_tracing_stopped())) {
>  		wakeup_trace->max_latency = delta;
> -		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
> +		update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu, NULL);
>  	}
>  
>  out_unlock: