On Tue, 5 Feb 2019 16:41:51 -0600 Tom Zanussi <zanussi@xxxxxxxxxx> wrote: > +/** > + * tracing_snapshot_cond_enable - enable conditional snapshot for an instance > + * @tr: The tracing instance > + * @cond_data: User data to associate with the snapshot > + * @update: Implementation of the cond_snapshot update function > + * > + * Check whether the conditional snapshot for the given instance has > + * already been enabled, or if the current tracer is already using a > + * snapshot; if so, return -EBUSY, else create a cond_snapshot and > + * save the cond_data and update function inside. > + * > + * Returns 0 if successful, error otherwise. > + */ > +int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, > + cond_update_fn_t update) > +{ > + struct cond_snapshot *cond_snapshot; > + int ret = 0; > + > + cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); > + if (!cond_snapshot) > + return -ENOMEM; > + > + cond_snapshot->cond_data = cond_data; > + cond_snapshot->update = update; > + > + mutex_lock(&trace_types_lock); > + > + ret = tracing_alloc_snapshot_instance(tr); > + if (ret) { > + kfree(cond_snapshot); > + return ret; Just returned while holding trace_types_lock. Best to have: if (ret) goto fail_unlock; > + } > + > + if (tr->current_trace->use_max_tr) { > + mutex_unlock(&trace_types_lock); > + kfree(cond_snapshot); > + return -EBUSY; > + } > + > + arch_spin_lock(&tr->max_lock); > + > + if (tr->cond_snapshot) { > + kfree(cond_snapshot); > + ret = -EBUSY; Hmm, as the tr->cond_snapshot can only be set while holding the trace_types_mutex (although it can be cleared without it), we can still move this check up. Yeah, it may race with clearing (but do we care?) but it makes the code a bit simpler. mutex_lock(&trace_types_lock); /* * Because tr->cond_snapshot is only set to something other * than NULL under the trace_types_lock, we can perform the * busy test here. Worse case is that we return a -EBUSY just * before it gets cleared. But do we really care about that? */ if (tr->cond_snapshot) { ret = -EBUSY; goto fail_unlock; } arch_spin_lock(&tr->max_lock); tr->cond_snapshot = cond_snapshot; arch_spin_unlock(&tr->max_lock); > + } else > + tr->cond_snapshot = cond_snapshot; > + > + arch_spin_unlock(&tr->max_lock); > + > + mutex_unlock(&trace_types_lock); > + > + return ret; fail_unlock: kfree(concd_snapshot); mutex_unlock(&trace_types_lock); return ret; > +} > +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); > + > +/** > + * tracing_snapshot_cond_disable - disable conditional snapshot for an instance > + * @tr: The tracing instance > + * > + * Check whether the conditional snapshot for the given instance is > + * enabled; if so, free the cond_snapshot associated with it, > + * otherwise return -EINVAL. > + * > + * Returns 0 if successful, error otherwise. > + */ > +int tracing_snapshot_cond_disable(struct trace_array *tr) > +{ > + int ret = 0; > + > + arch_spin_lock(&tr->max_lock); > + > + if (!tr->cond_snapshot) > + ret = -EINVAL; > + else { > + kfree(tr->cond_snapshot); > + tr->cond_snapshot = NULL; > + } > + > + arch_spin_unlock(&tr->max_lock); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); > #else > void tracing_snapshot(void) > { > WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); > } > EXPORT_SYMBOL_GPL(tracing_snapshot); > +void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) > +{ > + WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); > +} > +EXPORT_SYMBOL_GPL(tracing_snapshot_cond); > int tracing_alloc_snapshot(void) > { > WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); > @@ -1043,6 +1181,21 @@ void tracing_snapshot_alloc(void) > tracing_snapshot(); > } > EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); > +void *tracing_cond_snapshot_data(struct trace_array *tr) > +{ > + return NULL; > +} > +EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); > +int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) > +{ > + return -ENODEV; > +} > +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); > +int tracing_snapshot_cond_disable(struct trace_array *tr) > +{ > + return false; > +} > +EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); > #endif /* CONFIG_TRACER_SNAPSHOT */ > > void tracer_tracing_off(struct trace_array *tr) > @@ -1354,12 +1507,14 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) > * @tr: tracer > * @tsk: the task with the latency > * @cpu: The cpu that initiated the trace. > + * @cond_data: User data associated with a conditional snapshot > * > * Flip the buffers between the @tr and the max_tr and record information > * about which task was the cause of this latency. > */ > void > -update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) > +update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, > + void *cond_data) > { > if (tr->stop_count) > return; > @@ -1380,6 +1535,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) > else > ring_buffer_record_off(tr->max_buffer.buffer); > > +#ifdef CONFIG_TRACER_SNAPSHOT > + if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { Let's make this just a "goto out_unlock;"; And add the out_unlock just before the arch_spin_unlock. > + arch_spin_unlock(&tr->max_lock); > + return; > + } > +#endif > swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); > > __update_max_tr(tr, tsk, cpu); > @@ -5396,6 +5557,17 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) > if (t == tr->current_trace) > goto out; > > +#ifdef CONFIG_TRACER_SNAPSHOT > + if (t->use_max_tr) { > + arch_spin_lock(&tr->max_lock); > + if (tr->cond_snapshot) { > + ret = -EBUSY; > + arch_spin_unlock(&tr->max_lock); > + goto out; > + } > + arch_spin_unlock(&tr->max_lock); > + } How about: if (t->use_max_tr) { arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); if (ret) goto out; } > +#endif > /* Some tracers won't work on kernel command line */ > if (system_state < SYSTEM_RUNNING && t->noboot) { > pr_warn("Tracer '%s' is not allowed on command line, ignored\n", > @@ -6478,6 +6650,14 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, > goto out; > } > > + arch_spin_lock(&tr->max_lock); > + if (tr->cond_snapshot) { > + ret = -EBUSY; > + arch_spin_unlock(&tr->max_lock); > + goto out; > + } > + arch_spin_unlock(&tr->max_lock); Same here. > + > switch (val) { > case 0: > if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { > @@ -6503,7 +6683,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, > local_irq_disable(); > /* Now, we're going to swap */ > if (iter->cpu_file == RING_BUFFER_ALL_CPUS) > - update_max_tr(tr, current, smp_processor_id()); > + update_max_tr(tr, current, smp_processor_id(), NULL); > else > update_max_tr_single(tr, current, iter->cpu_file); > local_irq_enable(); > @@ -7095,7 +7275,7 @@ ftrace_snapshot(unsigned long ip, unsigned long parent_ip, > struct trace_array *tr, struct ftrace_probe_ops *ops, > void *data) > { > - tracing_snapshot_instance(tr); > + tracing_snapshot_instance(tr, NULL); > } > > static void > @@ -7117,7 +7297,7 @@ ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, > (*count)--; > } > > - tracing_snapshot_instance(tr); > + tracing_snapshot_instance(tr, NULL); > } > > static int > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index 08900828d282..23c1ed047868 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -194,6 +194,53 @@ struct trace_pid_list { > unsigned long *pids; > }; > > +typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data); > + > +/** > + * struct cond_snapshot - conditional snapshot data and callback > + * > + * The cond_snapshot structure encapsulates a callback function and > + * data associated with the snapshot for a given tracing instance. > + * > + * When a snapshot is taken conditionally, by invoking > + * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is > + * passed in turn to the cond_snapshot.update() function. That data > + * can be compared by the update() implementation with the cond_data > + * contained wihin the struct cond_snapshot instance associated with > + * the trace_array. Because the tr->max_lock is held throughout the > + * update() call, the update() function can directly retrieve the > + * cond_snapshot and cond_data associated with the per-instance > + * snapshot associated with the trace_array. > + * > + * The cond_snapshot.update() implementation can save data to be > + * associated with the snapshot if it decides to, and returns 'true' > + * in that case, or it returns 'false' if the conditional snapshot > + * shouldn't be taken. > + * > + * The cond_snapshot instance is created and associated with the > + * user-defined cond_data by tracing_cond_snapshot_enable(). > + * Likewise, the cond_snapshot instance is destroyed and is no longer > + * associated with the trace instance by > + * tracing_cond_snapshot_disable(). > + * > + * The method below is required. > + * > + * @update: When a conditional snapshot is invoked, the update() > + * callback function is invoked with the tr->max_lock held. The > + * update() implementation signals whether or not to actually > + * take the snapshot, by returning 'true' if so, 'false' if no > + * snapshot should be taken. Because the max_lock is held for > + * the duration of update(), the implementation is safe to > + * directly retrieven and save any implementation data it needs > + * to in association with the snapshot. > + */ > +#ifdef CONFIG_TRACER_SNAPSHOT > +struct cond_snapshot { > + void *cond_data; > + cond_update_fn_t update; > +}; > +#endif This is just defining a structure, not data or text. No need for the #ifdef. > + > /* > * The trace array - an array of per-CPU trace arrays. This is the > * highest level data structure that individual tracers deal with. > @@ -277,6 +324,9 @@ struct trace_array { > #endif > int time_stamp_abs_ref; > struct list_head hist_vars; > +#ifdef CONFIG_TRACER_SNAPSHOT > + struct cond_snapshot *cond_snapshot; > +#endif > }; > > enum { > @@ -727,7 +777,8 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, > const char __user *ubuf, size_t cnt); > > #ifdef CONFIG_TRACER_MAX_TRACE > -void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); > +void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, > + void *cond_data); > void update_max_tr_single(struct trace_array *tr, > struct task_struct *tsk, int cpu); > #endif /* CONFIG_TRACER_MAX_TRACE */ > @@ -1808,6 +1859,11 @@ static inline bool event_command_needs_rec(struct event_command *cmd_ops) > extern int trace_event_enable_disable(struct trace_event_file *file, > int enable, int soft_disable); > extern int tracing_alloc_snapshot(void); > +extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data); > +extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update); > + > +extern int tracing_snapshot_cond_disable(struct trace_array *tr); > +extern void *tracing_cond_snapshot_data(struct trace_array *tr); > > extern const char *__start___trace_bprintk_fmt[]; > extern const char *__stop___trace_bprintk_fmt[]; > @@ -1881,7 +1937,7 @@ static inline void trace_event_eval_update(struct trace_eval_map **map, int len) > #endif > > #ifdef CONFIG_TRACER_SNAPSHOT > -void tracing_snapshot_instance(struct trace_array *tr); > +void tracing_snapshot_instance(struct trace_array *tr, void *cond_data); > int tracing_alloc_snapshot_instance(struct trace_array *tr); > #else > static inline void tracing_snapshot_instance(struct trace_array *tr) { } > diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c > index 2152d1e530cb..a77102a17046 100644 > --- a/kernel/trace/trace_events_trigger.c > +++ b/kernel/trace/trace_events_trigger.c > @@ -1049,7 +1049,7 @@ snapshot_trigger(struct event_trigger_data *data, void *rec, > struct trace_event_file *file = data->private_data; > > if (file) > - tracing_snapshot_instance(file->tr); > + tracing_snapshot_instance(file->tr, NULL); Hmm, with all theses added NULLs, I wounder if it wouldn't just be better to make another function void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_var); void tracing_snapshot_instance(struct trace_array *tr) { tracing_snapshot_instance_cond(tr, NULL); } Then it wont be so intrusive, as there's more calls with NULL than something added. -- Steve > else > tracing_snapshot(); > } > diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c > index 4ea7e6845efb..007c074fe6d6 100644 > --- a/kernel/trace/trace_sched_wakeup.c > +++ b/kernel/trace/trace_sched_wakeup.c > @@ -482,7 +482,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt, > > if (likely(!is_tracing_stopped())) { > wakeup_trace->max_latency = delta; > - update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); > + update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu, NULL); > } > > out_unlock: