Includes some documentation on what AubCrash is supposed to achieve. Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/Kconfig | 8 ++++++ drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_aubcrash.c | 47 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_aubcrash.h | 42 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_debugfs.c | 49 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_sysfs.c | 54 +++++++++++++++++++++++++++++++++--- 6 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_aubcrash.c create mode 100644 drivers/gpu/drm/i915/i915_aubcrash.h diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index dfd9588..176e53e 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -70,6 +70,14 @@ config DRM_I915_CAPTURE_ERROR If in doubt, say "Y". +config DRM_I915_AUB_CRASH_DUMP + bool "Capture GPU error state in the form of an AUB file" + depends on DRM_I915_CAPTURE_ERROR + default n + help + Choose this option to allow the driver to dump a memtrace file (AUB) + with the GPU state when a hang is detected. + config DRM_I915_COMPRESS_ERROR bool "Compress GPU error state" depends on DRM_I915_CAPTURE_ERROR diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6c3b048..04956c7 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -124,6 +124,7 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_AUB_CRASH_DUMP) += i915_aubcrash.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ selftests/i915_selftest.o diff --git a/drivers/gpu/drm/i915/i915_aubcrash.c b/drivers/gpu/drm/i915/i915_aubcrash.c new file mode 100644 index 0000000..95b75ab --- /dev/null +++ b/drivers/gpu/drm/i915/i915_aubcrash.c @@ -0,0 +1,47 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Author: + * Oscar Mateo <oscar.mateo@xxxxxxxxx> + * + */ + +#include "intel_drv.h" +#include "i915_aubcrash.h" + +/** + * DOC: AubCrash + * + * This code is a companion to i915_gpu_error. The idea is that, on a GPU crash, + * we can dump an AUB file that describes the state of the system at the point + * of the crash (GTTs, contexts, BBs, BOs, etc...). While i915_gpu_error kind of + * already does that, it uses a text format that is not specially human-friendly. + * An AUB file, on the other hand, can be used by a number of tools (graphical + * AUB file browsers, simulators, emulators, etc...) that facilitate debugging. + * + */ + +int i915_error_state_to_aub(struct drm_i915_error_state_buf *m, + const struct i915_gpu_state *error) +{ + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_aubcrash.h b/drivers/gpu/drm/i915/i915_aubcrash.h new file mode 100644 index 0000000..bab1953 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_aubcrash.h @@ -0,0 +1,42 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_AUBCRASH_H_ +#define _INTEL_AUBCRASH_H_ + +#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP) + +int i915_error_state_to_aub(struct drm_i915_error_state_buf *m, + const struct i915_gpu_state *error); + +#else + +static inline int i915_error_state_to_aub(struct drm_i915_error_state_buf *m, + const struct i915_gpu_state *error) +{ + return 0; +} + +#endif + +#endif diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c65e381..f0f23ef 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -31,6 +31,7 @@ #include <linux/sched/mm.h> #include "intel_drv.h" #include "i915_guc_submission.h" +#include "i915_aubcrash.h" static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node) { @@ -938,7 +939,7 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) static ssize_t gpu_state_read(struct file *file, char __user *ubuf, - size_t count, loff_t *pos) + size_t count, loff_t *pos, bool type_aub) { struct i915_gpu_state *error = file->private_data; struct drm_i915_error_state_buf str; @@ -952,7 +953,10 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf, if (ret) return ret; - ret = i915_error_state_to_str(&str, error); + if (type_aub) + ret = i915_error_state_to_aub(&str, error); + else + ret = i915_error_state_to_str(&str, error); if (ret) goto out; @@ -967,6 +971,12 @@ static ssize_t gpu_state_read(struct file *file, char __user *ubuf, return ret; } +static ssize_t gpu_state_read_str(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) +{ + return gpu_state_read(file, ubuf, count, pos, false); +} + static int gpu_state_release(struct inode *inode, struct file *file) { i915_gpu_state_put(file->private_data); @@ -991,7 +1001,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file) static const struct file_operations i915_gpu_info_fops = { .owner = THIS_MODULE, .open = i915_gpu_info_open, - .read = gpu_state_read, + .read = gpu_state_read_str, .llseek = default_llseek, .release = gpu_state_release, }; @@ -1022,11 +1032,38 @@ static int i915_error_state_open(struct inode *inode, struct file *file) static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = gpu_state_read, + .read = gpu_state_read_str, + .write = i915_error_state_write, + .llseek = default_llseek, + .release = gpu_state_release, +}; +#endif + +#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP) + +static ssize_t gpu_state_read_aub(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) +{ + return gpu_state_read(file, ubuf, count, pos, true); +} + +static const struct file_operations i915_gpu_info_aub_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read_aub, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static const struct file_operations i915_error_state_aub_fops = { + .owner = THIS_MODULE, + .open = i915_error_state_open, + .read = gpu_state_read_aub, .write = i915_error_state_write, .llseek = default_llseek, .release = gpu_state_release, }; + #endif static int @@ -4776,6 +4813,10 @@ static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) {"i915_error_state", &i915_error_state_fops}, {"i915_gpu_info", &i915_gpu_info_fops}, #endif +#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP) + {"i915_error_state_aub", &i915_error_state_aub_fops}, + {"i915_gpu_info_aub", &i915_gpu_info_aub_fops}, +#endif {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 791759f..646ba5f 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -31,6 +31,7 @@ #include <linux/sysfs.h> #include "intel_drv.h" #include "i915_drv.h" +#include "i915_aubcrash.h" static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) { @@ -495,9 +496,8 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, - loff_t off, size_t count) + loff_t off, size_t count, bool type_aub) { - struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); struct drm_i915_error_state_buf error_str; @@ -509,7 +509,11 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, return ret; gpu = i915_first_error_state(dev_priv); - ret = i915_error_state_to_str(&error_str, gpu); + + if (type_aub) + ret = i915_error_state_to_aub(&error_str, gpu); + else + ret = i915_error_state_to_str(&error_str, gpu); if (ret) goto out; @@ -536,11 +540,18 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, return count; } +static ssize_t error_state_read_str(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + return error_state_read(filp, kobj, attr, buf, off, count, false); +} + static const struct bin_attribute error_state_attr = { .attr.name = "error", .attr.mode = S_IRUSR | S_IWUSR, .size = 0, - .read = error_state_read, + .read = error_state_read_str, .write = error_state_write, }; @@ -559,6 +570,39 @@ static void i915_setup_error_capture(struct device *kdev) {} static void i915_teardown_error_capture(struct device *kdev) {} #endif +#if IS_ENABLED(CONFIG_DRM_I915_AUB_CRASH_DUMP) + +static ssize_t error_state_read_aub(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + return error_state_read(filp, kobj, attr, buf, off, count, true); +} + +static const struct bin_attribute aub_state_attr = { + .attr.name = "aub", + .attr.mode = S_IRUSR | S_IWUSR, + .size = 0, + .read = error_state_read_aub, + .write = error_state_write, +}; + +static void i915_setup_error_capture_aub(struct device *kdev) +{ + if (sysfs_create_bin_file(&kdev->kobj, &aub_state_attr)) + DRM_ERROR("aub_state sysfs setup failed\n"); +} + +static void i915_teardown_error_capture_aub(struct device *kdev) +{ + sysfs_remove_bin_file(&kdev->kobj, &aub_state_attr); +} + +#else +static void i915_setup_error_capture_aub(struct device *kdev) {} +static void i915_teardown_error_capture_aub(struct device *kdev) {} +#endif + void i915_setup_sysfs(struct drm_i915_private *dev_priv) { struct device *kdev = dev_priv->drm.primary->kdev; @@ -606,6 +650,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) DRM_ERROR("RPS sysfs setup failed\n"); i915_setup_error_capture(kdev); + i915_setup_error_capture_aub(kdev); } void i915_teardown_sysfs(struct drm_i915_private *dev_priv) @@ -613,6 +658,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) struct device *kdev = dev_priv->drm.primary->kdev; i915_teardown_error_capture(kdev); + i915_teardown_error_capture_aub(kdev); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sysfs_remove_files(&kdev->kobj, vlv_attrs); -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx