Quoting Sean Paul (2020-01-14 19:21:43) > From: Sean Paul <seanpaul@xxxxxxxxxxxx> > > This patch uses a ring_buffer to keep a "flight recorder" (name credit Weston) > of DRM logs for a specified set of debug categories. The user writes a > bitmask of debug categories to the "trace_mask" node and can read log > messages from the "trace" node. Would it be worthy to add more specificity to the terminology and call this "log trace" to tell it apart from pre-existing tracing in the context tracepoints? Especially as the proposal started as adding tracepoints. Further, I feel like it might make sense to add "log_trace_size" property, or is your thinking to deliberately keep the buffer rather small? A dump at the time of GEM error from i915 can be rather massive (and massively useful, too). Other than that, it looks good to me. Thanks for moving this forward :) Regards, Joonas > These nodes currently exist in debugfs under the dri directory. I > intended on exposing all of this through tracefs originally, but the > tracefs entry points are not exposed, so there's no way to create > tracefs files from drivers at the moment. I think it would be a > worthwhile endeavour, but one requiring more time and conversation to > ensure the drm traces fit somewhere sensible. > > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > Cc: David Airlie <airlied@xxxxxxxxx> > Cc: Jani Nikula <jani.nikula@xxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Pekka Paalanen <ppaalanen@xxxxxxxxx> > Cc: Rob Clark <robdclark@xxxxxxxxx> > Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> > Cc: Thomas Zimmermann <tzimmermann@xxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Signed-off-by: Sean Paul <seanpaul@xxxxxxxxxxxx> > Link: https://patchwork.freedesktop.org/patch/msgid/20191010204823.195540-1-sean@xxxxxxxxxx #v1 > Link: https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html #v2 > Link: https://patchwork.freedesktop.org/patch/msgid/20191212203301.142437-1-sean@xxxxxxxxxx #v3 > > Changes in v2: > - Went with a completely different approach: > https://lists.freedesktop.org/archives/dri-devel/2019-November/243230.html > > Changes in v3: > - Changed commit message to be a bit less RFC-y > - Make class_drm_category_log an actual trace class > > Changes in v4: > - Instead of [ab]using trace events and the system trace buffer, use our > own ringbuffer > --- > --- > Documentation/gpu/drm-uapi.rst | 9 + > drivers/gpu/drm/Kconfig | 1 + > drivers/gpu/drm/Makefile | 2 +- > drivers/gpu/drm/drm_drv.c | 3 + > drivers/gpu/drm/drm_print.c | 80 +++++-- > drivers/gpu/drm/drm_trace.c | 376 +++++++++++++++++++++++++++++++++ > include/drm/drm_print.h | 39 ++++ > 7 files changed, 487 insertions(+), 23 deletions(-) > create mode 100644 drivers/gpu/drm/drm_trace.c > > diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst > index 56fec6ed1ad8..089eb6fd3e94 100644 > --- a/Documentation/gpu/drm-uapi.rst > +++ b/Documentation/gpu/drm-uapi.rst > @@ -312,6 +312,15 @@ Debugfs Support > .. kernel-doc:: drivers/gpu/drm/drm_debugfs.c > :export: > > +DRM Tracing > +--------------- > + > +.. kernel-doc:: drivers/gpu/drm/drm_trace.c > + :doc: DRM Tracing > + > +.. kernel-doc:: drivers/gpu/drm/drm_trace.c > + :internal: > + > Sysfs Support > ============= > > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig > index d0aa6cff2e02..9d8077e87afe 100644 > --- a/drivers/gpu/drm/Kconfig > +++ b/drivers/gpu/drm/Kconfig > @@ -14,6 +14,7 @@ menuconfig DRM > select I2C > select I2C_ALGOBIT > select DMA_SHARED_BUFFER > + select RING_BUFFER > select SYNC_FILE > help > Kernel-level support for the Direct Rendering Infrastructure (DRI) > diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile > index 6493088a0fdd..88b4674934e6 100644 > --- a/drivers/gpu/drm/Makefile > +++ b/drivers/gpu/drm/Makefile > @@ -29,7 +29,7 @@ drm-$(CONFIG_DRM_PANEL) += drm_panel.o > drm-$(CONFIG_OF) += drm_of.o > drm-$(CONFIG_AGP) += drm_agpsupport.o > drm-$(CONFIG_PCI) += drm_pci.o > -drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o > +drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o drm_trace.o > drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o > > drm_vram_helper-y := drm_gem_vram_helper.o \ > diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c > index 7c18a980cd4b..98260b9f8004 100644 > --- a/drivers/gpu/drm/drm_drv.c > +++ b/drivers/gpu/drm/drm_drv.c > @@ -1114,6 +1114,7 @@ static const struct file_operations drm_stub_fops = { > static void drm_core_exit(void) > { > unregister_chrdev(DRM_MAJOR, "drm"); > + drm_trace_cleanup(); > debugfs_remove(drm_debugfs_root); > drm_sysfs_destroy(); > idr_destroy(&drm_minors_idr); > @@ -1135,6 +1136,8 @@ static int __init drm_core_init(void) > > drm_debugfs_root = debugfs_create_dir("dri", NULL); > > + WARN_ON(drm_trace_init(drm_debugfs_root)); > + > ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops); > if (ret < 0) > goto error; > diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c > index 111b932cf2a9..0ac1867937bf 100644 > --- a/drivers/gpu/drm/drm_print.c > +++ b/drivers/gpu/drm/drm_print.c > @@ -262,21 +262,37 @@ void drm_dev_dbg(const struct device *dev, enum drm_debug_category category, > struct va_format vaf; > va_list args; > > - if (!drm_debug_enabled(category)) > - return; > - > - va_start(args, format); > - vaf.fmt = format; > - vaf.va = &args; > - > - if (dev) > - dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV", > - __builtin_return_address(0), &vaf); > - else > - printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV", > - __builtin_return_address(0), &vaf); > + if (drm_debug_enabled(category)) { > + va_start(args, format); > + vaf.fmt = format; > + vaf.va = &args; > + > + if (dev) > + dev_printk(KERN_DEBUG, dev, "[" DRM_NAME ":%ps] %pV", > + __builtin_return_address(0), &vaf); > + else > + printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV", > + __builtin_return_address(0), &vaf); > + > + va_end(args); > + } > > - va_end(args); > + if (drm_trace_enabled(category)) { > + va_start(args, format); > + vaf.fmt = format; > + vaf.va = &args; > + > + if (dev) > + drm_dev_trace_printf(dev, "[%ps] %pV", > + __builtin_return_address(0), > + &vaf); > + else > + drm_trace_printf("[%ps] %pV", > + __builtin_return_address(0), > + &vaf); > + > + va_end(args); > + } > } > EXPORT_SYMBOL(drm_dev_dbg); > > @@ -285,17 +301,28 @@ void __drm_dbg(enum drm_debug_category category, const char *format, ...) > struct va_format vaf; > va_list args; > > - if (!drm_debug_enabled(category)) > - return; > > - va_start(args, format); > - vaf.fmt = format; > - vaf.va = &args; > + if (drm_debug_enabled(category)) { > + va_start(args, format); > + vaf.fmt = format; > + vaf.va = &args; > > - printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV", > - __builtin_return_address(0), &vaf); > + printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV", > + __builtin_return_address(0), &vaf); > > - va_end(args); > + va_end(args); > + } > + > + if (drm_trace_enabled(category)) { > + va_start(args, format); > + vaf.fmt = format; > + vaf.va = &args; > + > + drm_trace_printf("[%ps] %pV", __builtin_return_address(0), > + &vaf); > + > + va_end(args); > + } > } > EXPORT_SYMBOL(__drm_dbg); > > @@ -312,6 +339,15 @@ void __drm_err(const char *format, ...) > __builtin_return_address(0), &vaf); > > va_end(args); > + > + va_start(args, format); > + vaf.fmt = format; > + vaf.va = &args; > + > + drm_trace_printf("[%ps] *ERROR* %pV", __builtin_return_address(0), > + &vaf); > + > + va_end(args); > } > EXPORT_SYMBOL(__drm_err); > > diff --git a/drivers/gpu/drm/drm_trace.c b/drivers/gpu/drm/drm_trace.c > new file mode 100644 > index 000000000000..0e7cf342d1d9 > --- /dev/null > +++ b/drivers/gpu/drm/drm_trace.c > @@ -0,0 +1,376 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright (C) 2020 Google, Inc. > + * > + * Authors: > + * Sean Paul <seanpaul@xxxxxxxxxxxx> > + */ > + > +#include <linux/cpumask.h> > +#include <linux/debugfs.h> > +#include <linux/kernel.h> > +#include <linux/ring_buffer.h> > +#include <linux/slab.h> > +#include <linux/stat.h> > + > +#include <drm/drm_device.h> > +#include <drm/drm_print.h> > + > +#define DRM_TRACE_MAX_LEN 256 > + > +/** > + * DOC: DRM Tracing > + * > + * *tl;dr* DRM tracing is a lightweight alternative to traditional DRM debug > + * logging. > + * > + * While DRM logging is quite convenient when reproducing a specific issue, it > + * doesn't help when something goes wrong unexpectedly. There are a couple > + * reasons why one does not want to enable DRM logging at all times: > + * > + * 1. We don't want to overwhelm syslog with drm spam, others have to use it too > + * 2. Console logging is slow > + * > + * DRM tracing aims to solve both these problems. > + * > + * To use DRM tracing, write a DRM debug category mask (this is a bitmask of > + * &drm_debug_category values) to the trace_mask file: > + * :: > + * > + * eg: echo 0x106 > /sys/kernel/debug/dri/trace_mask > + * > + * Once active, all log messages in the specified categories will be written to > + * the DRM trace. Once at capacity, the trace will overwrite old messages with > + * new ones. At any point, one can read the trace file to extract the previous N > + * DRM messages: > + * :: > + * > + * eg: cat /sys/kernel/debug/dri/trace > + * > + * Considerations > + * ************** > + * The contents of the DRM Trace are **not** considered UABI. **DO NOT depend on > + * the values of these traces in your userspace.** These traces are intended for > + * entertainment purposes only. The contents of these logs carry no warranty, > + * expressed or implied. > + * > + * New traces can not be added to the trace buffer while it is being read. If > + * this proves to be a problem, it can be mitigated by making a copy of the > + * buffer on start of read. Since DRM trace is not meant to be continuously > + * read, this loss is acceptable. > + * > + * The timestamps on logs are CPU-local. As such, log messages from different > + * CPUs may have slightly different ideas about time. > + * > + * Since each CPU has its own buffer, they won't all overflow at the same rate. > + * This means that messages from a particularly active CPU could be dropped > + * while an inactive CPU might have much older log messages. So don't be fooled > + * if you seem to be missing log messages when you see a switch between CPUs in > + * the logs. > + * > + * Internals > + * ********* > + * The DRM Tracing functions are intentionally unexported, they are not meant to > + * be used by drivers directly. The reasons are twofold: > + * > + * 1. All messages going to traces should also go to the console logs. This > + * ensures users can choose their logging medium without fear they're losing > + * messages. > + * 2. Writing directly to the trace skips category filtering, resulting in trace > + * spam. > + */ > + > +struct drm_trace_info { > + struct ring_buffer *buffer; > + struct dentry *debugfs; > + struct dentry *debugfs_mask; > + enum drm_debug_category category_mask; > +}; > +static struct drm_trace_info drm_trace; > + > +struct drm_trace_seq_iter { > + loff_t pos; > + cpumask_var_t cpu_mask; > + int cpu; > + u64 ts; > +}; > + > +static void *drm_trace_seq_start(struct seq_file *seq_file, loff_t *pos) > +{ > + struct drm_trace_info *info = seq_file->private; > + struct drm_trace_seq_iter *iter; > + int cpu; > + > + iter = kzalloc(sizeof(*iter), GFP_KERNEL); > + if (!iter) > + return NULL; > + > + if (!zalloc_cpumask_var(&iter->cpu_mask, GFP_KERNEL)) > + return NULL; > + > + /* > + * TODO: We could do better than stopping record for the entirety of the > + * read session. > + */ > + ring_buffer_record_off(info->buffer); > + > + /* > + * pos is only used as a means of determining whether we're at the start > + * of the virtual file, or continuing a read. We don't want to skip over > + * log lines since that's not a meaningful thing to do. > + */ > + iter->pos = *pos; > + > + iter->cpu = -1; > + > + /* > + * There's no way to extract the ring buffer's cpumask, so we'll try > + * every possible cpu and skip the invalid entries. > + */ > + for_each_possible_cpu(cpu) { > + if (ring_buffer_entries_cpu(info->buffer, cpu)) > + cpumask_set_cpu(cpu, iter->cpu_mask); > + } > + > + return iter; > +} > + > +static void *drm_trace_seq_next(struct seq_file *seq_file, void *data, > + loff_t *pos) > +{ > + struct drm_trace_info *info = seq_file->private; > + struct drm_trace_seq_iter *iter = data; > + int cpu; > + > + *pos = ++iter->pos; > + iter->cpu = -1; > + iter->ts = 0; > + > + /* Find the oldest event across our cpu_mask */ > + for_each_cpu(cpu, iter->cpu_mask) { > + u64 ts; > + > + if (!ring_buffer_peek(info->buffer, cpu, &ts, NULL)) { > + cpumask_clear_cpu(cpu, iter->cpu_mask); > + continue; > + } > + if (iter->cpu == -1 || ts < iter->ts) { > + iter->ts = ts; > + iter->cpu = cpu; > + } > + } > + if (iter->cpu == -1) > + return NULL; > + > + return iter; > +} > + > +static void drm_trace_seq_stop(struct seq_file *seq_file, void *data) > +{ > + struct drm_trace_info *info = seq_file->private; > + struct drm_trace_seq_iter *iter = data; > + > + free_cpumask_var(iter->cpu_mask); > + kfree(iter); > + > + ring_buffer_record_on(info->buffer); > +} > + > +static int drm_trace_seq_show(struct seq_file *seq_file, void *data) > +{ > + struct drm_trace_info *info = seq_file->private; > + struct drm_trace_seq_iter *iter = data; > + struct ring_buffer_event *event; > + u64 ts, usec; > + > + if (iter->pos == 0) { > + seq_printf(seq_file, "[%3s %12s] %s\n", "cpu", "timestamp", > + "message"); > + return 0; > + } else if (iter->cpu == -1) { > + /* This happens when we start a session with position > 0 */ > + return SEQ_SKIP; > + } > + > + event = ring_buffer_consume(info->buffer, iter->cpu, &ts, NULL); > + > + ts += 500; > + /* ts converts from ns->us */ > + do_div(ts, 1000); > + /* ts converts from us->s */ > + usec = do_div(ts, USEC_PER_SEC); > + > + seq_printf(seq_file, "[%3u %5llu.%06llu] %s", iter->cpu, ts, usec, > + (const char *)ring_buffer_event_data(event)); > + > + return 0; > +} > + > +static const struct seq_operations drm_trace_sops = { > + .start = drm_trace_seq_start, > + .next = drm_trace_seq_next, > + .stop = drm_trace_seq_stop, > + .show = drm_trace_seq_show > +}; > + > +static int drm_trace_fop_open(struct inode *inode, struct file *file) > +{ > + struct seq_file *seq_file; > + int ret; > + > + ret = seq_open(file, &drm_trace_sops); > + if (ret) > + return ret; > + > + seq_file = (struct seq_file *)file->private_data; > + seq_file->private = inode->i_private; /* this is drm_trace_info */ > + > + return 0; > +} > + > +static const struct file_operations drm_trace_fops = { > + .open = drm_trace_fop_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = seq_release, > +}; > + > +/** > + * drm_trace_init - initializes tracing for drm core > + * @debugfs_root: the dentry for drm core's debugfs root > + * > + * This function is called on drm core init. It is responsible for initializing > + * drm tracing. This function must be matched by a call to drm_trace_cleanup(). > + * > + * Returns: 0 on success, -errno on failure > + */ > +int drm_trace_init(struct dentry *debugfs_root) > +{ > + struct drm_trace_info *info = &drm_trace; > + int ret; > + > + info->buffer = ring_buffer_alloc(PAGE_SIZE * 2, RB_FL_OVERWRITE); > + if (!info->buffer) > + return -ENOMEM; > + > + info->debugfs_mask = debugfs_create_u32("trace_mask", > + S_IFREG | S_IRUGO | S_IWUSR, > + debugfs_root, > + &info->category_mask); > + if (IS_ERR(info->debugfs)) { > + ret = PTR_ERR(info->debugfs); > + goto err_debugfs_mask; > + } > + > + info->debugfs = debugfs_create_file("trace", S_IFREG | S_IRUGO, > + debugfs_root, info, > + &drm_trace_fops); > + if (IS_ERR(info->debugfs)) { > + ret = PTR_ERR(info->debugfs); > + goto err_debugfs; > + } > + > + return 0; > + > +err_debugfs_mask: > + debugfs_remove(info->debugfs_mask); > +err_debugfs: > + ring_buffer_free(info->buffer); > + return ret; > +} > + > +/** > + * drm_trace_cleanup - cleans up tracing for drm core > + * > + * This function is responsible for cleaning up anything that was previously > + * initialized in drm_trace_init() > + */ > +void drm_trace_cleanup() > +{ > + struct drm_trace_info *info = &drm_trace; > + > + debugfs_remove(info->debugfs); > + debugfs_remove(info->debugfs_mask); > + ring_buffer_free(info->buffer); > + memset(info, 0, sizeof(*info)); > +} > + > +/** > + * drm_trace_enabled - check if a debug category has traces enabled > + * @category: the debug category to check > + * > + * Returns true if the given category has drm traces enabled, false otherwise. > + */ > +bool drm_trace_enabled(enum drm_debug_category category) > +{ > + return READ_ONCE(drm_trace.category_mask) & category; > +} > + > +static int drm_trace_write(const void *data, unsigned int len) > +{ > + struct drm_trace_info *info = &drm_trace; > + struct ring_buffer_event *event; > + void *event_body; > + > + event = ring_buffer_lock_reserve(info->buffer, len); > + if (!event) > + return -ENOMEM; > + > + event_body = ring_buffer_event_data(event); > + memcpy(event_body, data, len); > + > + return ring_buffer_unlock_commit(info->buffer, event); > +} > + > +/** > + * drm_trace_printf - adds an entry to the drm trace > + * @format: printf format of the message to add to the trace > + * > + * This function adds a new entry in the drm trace. > + */ > +void drm_trace_printf(const char *format, ...) > +{ > + char buf[DRM_TRACE_MAX_LEN]; > + va_list args; > + int ret; > + > + va_start(args, format); > + ret = vsnprintf(buf, sizeof(buf), format, args); > + va_end(args); > + > + if (ret < 0) > + return; > + else if (ret >= sizeof(buf)) > + ret = sizeof(buf) - 1; > + > + drm_trace_write(buf, ret + 1); > +} > + > +/** > + * drm_dev_trace_printf - adds an entry to the drm trace > + * @dev: pointer to device so we can print the name > + * @format: printf format of the message to add to the trace > + * > + * This function adds a new entry in the drm trace. > + */ > +void drm_dev_trace_printf(const struct device *dev, const char *format, ...) > +{ > + char buf[DRM_TRACE_MAX_LEN]; > + struct va_format vaf; > + va_list args; > + int ret; > + > + va_start(args, format); > + vaf.fmt = format; > + vaf.va = &args; > + ret = snprintf(buf, sizeof(buf), "%s %s: %pV", dev_driver_string(dev), > + dev_name(dev), &vaf); > + va_end(args); > + > + if (ret < 0) > + return; > + else if (ret >= sizeof(buf)) > + ret = sizeof(buf) - 1; > + > + drm_trace_write(buf, ret + 1); > +} > diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h > index 8f99d389792d..1f3dae7150f0 100644 > --- a/include/drm/drm_print.h > +++ b/include/drm/drm_print.h > @@ -320,6 +320,45 @@ static inline bool drm_debug_enabled(enum drm_debug_category category) > return unlikely(__drm_debug & category); > } > > +#ifdef CONFIG_DEBUG_FS > + > +int drm_trace_init(struct dentry *debugfs_root); > +bool drm_trace_enabled(enum drm_debug_category category); > +__printf(1, 2) > +void drm_trace_printf(const char *format, ...); > +__printf(2, 3) > +void drm_dev_trace_printf(const struct device *dev, const char *format, ...); > +void drm_trace_cleanup(void); > + > +#else > + > +static inline int drm_trace_init(struct dentry *debugfs_root) > +{ > + return 0; > +} > + > +static inline bool drm_trace_enabled(enum drm_debug_category category) > +{ > + return false; > +} > + > +__printf(1, 2) > +static inline void drm_trace_printf(const char *format, ...) > +{ > +} > + > +__printf(2, 3) > +static inline void drm_dev_trace_printf(const struct device *dev, > + const char *format, ...) > +{ > +} > + > +static inline void drm_trace_cleanup(void) > +{ > +} > + > +#endif > + > /* > * struct device based logging > * > -- > Sean Paul, Software Engineer, Google / Chromium OS > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel