On Wed, 11 Oct 2023, John Harrison <john.c.harrison@xxxxxxxxx> wrote: > On 10/11/2023 09:38, Jani Nikula wrote: >> Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt >> conditional compilation, as i915_gpu_error.c is only built with >> DRM_I915_CAPTURE_ERROR=y. >> >> With this, we can also make i915_first_error_state() static. >> >> Signed-off-by: Jani Nikula <jani.nikula@xxxxxxxxx> >> --- >> drivers/gpu/drm/i915/i915_gpu_error.c | 75 ++++++++++++++++++++++++- >> drivers/gpu/drm/i915/i915_gpu_error.h | 17 +++--- >> drivers/gpu/drm/i915/i915_sysfs.c | 79 +-------------------------- >> 3 files changed, 86 insertions(+), 85 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c >> index b4c8459deb7b..f9e750217f18 100644 >> --- a/drivers/gpu/drm/i915/i915_gpu_error.c >> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c >> @@ -57,6 +57,7 @@ >> #include "i915_memcpy.h" >> #include "i915_reg.h" >> #include "i915_scatterlist.h" >> +#include "i915_sysfs.h" >> #include "i915_utils.h" >> >> #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) >> @@ -2208,7 +2209,7 @@ void i915_capture_error_state(struct intel_gt *gt, >> i915_gpu_coredump_put(error); >> } >> >> -struct i915_gpu_coredump * >> +static struct i915_gpu_coredump * >> i915_first_error_state(struct drm_i915_private *i915) >> { >> struct i915_gpu_coredump *error; >> @@ -2484,3 +2485,75 @@ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915) >> debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915, >> &i915_gpu_info_fops); >> } >> + >> +static ssize_t error_state_read(struct file *filp, struct kobject *kobj, >> + struct bin_attribute *attr, char *buf, >> + loff_t off, size_t count) >> +{ >> + >> + struct device *kdev = kobj_to_dev(kobj); >> + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); >> + struct i915_gpu_coredump *gpu; >> + ssize_t ret = 0; >> + >> + /* >> + * FIXME: Concurrent clients triggering resets and reading + clearing >> + * dumps can cause inconsistent sysfs reads when a user calls in with a >> + * non-zero offset to complete a prior partial read but the >> + * gpu_coredump has been cleared or replaced. >> + */ >> + >> + gpu = i915_first_error_state(i915); >> + if (IS_ERR(gpu)) { >> + ret = PTR_ERR(gpu); >> + } else if (gpu) { >> + ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); >> + i915_gpu_coredump_put(gpu); >> + } else { >> + const char *str = "No error state collected\n"; >> + size_t len = strlen(str); >> + >> + if (off < len) { >> + ret = min_t(size_t, count, len - off); >> + memcpy(buf, str + off, ret); >> + } >> + } > Can this and the debugfs equivalent not be common code? It seems like > the implementations are conceptually the same even if the code currently > looks quite different. They probably can, but this is just the code movement part. I initially sent a bigger refactoring series [1], but decided to chop it up and send it in smaller pieces, to not burden the reviewers. The first part [2] has already been merged, and this is follow-up. BR, Jani. [1] https://lore.kernel.org/r/cover.1695924021.git.jani.nikula@xxxxxxxxx [2] https://lore.kernel.org/r/cover.1696236329.git.jani.nikula@xxxxxxxxx > > John. > >> + >> + return ret; >> +} >> + >> +static ssize_t error_state_write(struct file *file, struct kobject *kobj, >> + struct bin_attribute *attr, char *buf, >> + loff_t off, size_t count) >> +{ >> + struct device *kdev = kobj_to_dev(kobj); >> + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); >> + >> + drm_dbg(&dev_priv->drm, "Resetting error state\n"); >> + i915_reset_error_state(dev_priv); >> + >> + return count; >> +} >> + >> +static const struct bin_attribute error_state_attr = { >> + .attr.name = "error", >> + .attr.mode = S_IRUSR | S_IWUSR, >> + .size = 0, >> + .read = error_state_read, >> + .write = error_state_write, >> +}; >> + >> +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) >> +{ >> + struct device *kdev = i915->drm.primary->kdev; >> + >> + if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) >> + drm_err(&i915->drm, "error_state sysfs setup failed\n"); >> +} >> + >> +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) >> +{ >> + struct device *kdev = i915->drm.primary->kdev; >> + >> + sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); >> +} >> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h >> index a6f2a7518cf0..68c964d6720a 100644 >> --- a/drivers/gpu/drm/i915/i915_gpu_error.h >> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h >> @@ -323,11 +323,12 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) >> kref_put(&gpu->ref, __i915_gpu_coredump_free); >> } >> >> -struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915); >> void i915_reset_error_state(struct drm_i915_private *i915); >> void i915_disable_error_state(struct drm_i915_private *i915, int err); >> >> void i915_gpu_error_debugfs_register(struct drm_i915_private *i915); >> +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915); >> +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915); >> >> #else >> >> @@ -396,12 +397,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu) >> { >> } >> >> -static inline struct i915_gpu_coredump * >> -i915_first_error_state(struct drm_i915_private *i915) >> -{ >> - return ERR_PTR(-ENODEV); >> -} >> - >> static inline void i915_reset_error_state(struct drm_i915_private *i915) >> { >> } >> @@ -415,6 +410,14 @@ static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915 >> { >> } >> >> +static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915) >> +{ >> +} >> + >> +static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915) >> +{ >> +} >> + >> #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ >> >> #endif /* _I915_GPU_ERROR_H_ */ >> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c >> index e88bb4f04305..613decd47760 100644 >> --- a/drivers/gpu/drm/i915/i915_sysfs.c >> +++ b/drivers/gpu/drm/i915/i915_sysfs.c >> @@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = { >> .private = (void *)1 >> }; >> >> -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) >> - >> -static ssize_t error_state_read(struct file *filp, struct kobject *kobj, >> - struct bin_attribute *attr, char *buf, >> - loff_t off, size_t count) >> -{ >> - >> - struct device *kdev = kobj_to_dev(kobj); >> - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); >> - struct i915_gpu_coredump *gpu; >> - ssize_t ret = 0; >> - >> - /* >> - * FIXME: Concurrent clients triggering resets and reading + clearing >> - * dumps can cause inconsistent sysfs reads when a user calls in with a >> - * non-zero offset to complete a prior partial read but the >> - * gpu_coredump has been cleared or replaced. >> - */ >> - >> - gpu = i915_first_error_state(i915); >> - if (IS_ERR(gpu)) { >> - ret = PTR_ERR(gpu); >> - } else if (gpu) { >> - ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count); >> - i915_gpu_coredump_put(gpu); >> - } else { >> - const char *str = "No error state collected\n"; >> - size_t len = strlen(str); >> - >> - if (off < len) { >> - ret = min_t(size_t, count, len - off); >> - memcpy(buf, str + off, ret); >> - } >> - } >> - >> - return ret; >> -} >> - >> -static ssize_t error_state_write(struct file *file, struct kobject *kobj, >> - struct bin_attribute *attr, char *buf, >> - loff_t off, size_t count) >> -{ >> - struct device *kdev = kobj_to_dev(kobj); >> - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); >> - >> - drm_dbg(&dev_priv->drm, "Resetting error state\n"); >> - i915_reset_error_state(dev_priv); >> - >> - return count; >> -} >> - >> -static const struct bin_attribute error_state_attr = { >> - .attr.name = "error", >> - .attr.mode = S_IRUSR | S_IWUSR, >> - .size = 0, >> - .read = error_state_read, >> - .write = error_state_write, >> -}; >> - >> -static void i915_setup_error_capture(struct device *kdev) >> -{ >> - if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr)) >> - drm_err(&kdev_minor_to_i915(kdev)->drm, >> - "error_state sysfs setup failed\n"); >> -} >> - >> -static void i915_teardown_error_capture(struct device *kdev) >> -{ >> - sysfs_remove_bin_file(&kdev->kobj, &error_state_attr); >> -} >> -#else >> -static void i915_setup_error_capture(struct device *kdev) {} >> -static void i915_teardown_error_capture(struct device *kdev) {} >> -#endif >> - >> void i915_setup_sysfs(struct drm_i915_private *dev_priv) >> { >> struct device *kdev = dev_priv->drm.primary->kdev; >> @@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv) >> drm_warn(&dev_priv->drm, >> "failed to register GT sysfs directory\n"); >> >> - i915_setup_error_capture(kdev); >> + i915_gpu_error_sysfs_setup(dev_priv); >> >> intel_engines_add_sysfs(dev_priv); >> } >> @@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv) >> { >> struct device *kdev = dev_priv->drm.primary->kdev; >> >> - i915_teardown_error_capture(kdev); >> + i915_gpu_error_sysfs_teardown(dev_priv); >> >> device_remove_bin_file(kdev, &dpf_attrs_1); >> device_remove_bin_file(kdev, &dpf_attrs); > -- Jani Nikula, Intel