On Mon, May 14, 2018 at 10:19:27AM +0100, Brian Starkey wrote: > Hi Alex, > > On Fri, May 11, 2018 at 06:56:03PM +0100, Alexandru Gheorghe wrote: > >Status register contains a lot of bits for reporting internal errors > >inside Mali DP. Currently, we just silently ignore all of the erorrs, > > Being picky: s/erorrs/errors/ > > >that doesn't help when we are investigating different bugs, especially > >on the FPGA models which have a lot of constrains, so we could easily > > s/constrains/constraints/ > > >end up in AXI or underrun errors. > > > >Add a new file called debug that contains an agregate of the > > s/agregate/aggregate/ > > >errors reported by the Mali DP hardware. > > > >E.g: > >[root@alarm ~]# cat /sys/kernel/debug/dri/1/debug > >[DE] num_errors : 167 > >[DE] last_error_status : 0x00000001 > >[DE] last_error_vblank : 385 > >[SE] num_errors : 3 > >[SE] last_error_status : 0x00e23001 > >[SE] last_error_vblank : 201 > > > >This a morphosis of the patch presented here [1], where the errors > >where reported continuously via trace_printk. > > > >[1] https://lists.freedesktop.org/archives/dri-devel/2018-February/167042.html > > > >Signed-off-by: Alexandru Gheorghe <alexandru-cosmin.gheorghe@xxxxxxx> > >--- > >drivers/gpu/drm/arm/malidp_drv.c | 61 +++++++++++++++++++++++++++++++++++++++ > >drivers/gpu/drm/arm/malidp_drv.h | 15 ++++++++++ > >drivers/gpu/drm/arm/malidp_hw.c | 46 ++++++++++++++++++++++++----- > >drivers/gpu/drm/arm/malidp_hw.h | 1 + > >drivers/gpu/drm/arm/malidp_regs.h | 6 ++++ > >5 files changed, 122 insertions(+), 7 deletions(-) > > > >diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c > >index 8d20faa..70ce19a 100644 > >--- a/drivers/gpu/drm/arm/malidp_drv.c > >+++ b/drivers/gpu/drm/arm/malidp_drv.c > >@@ -17,6 +17,7 @@ > >#include <linux/of_graph.h> > >#include <linux/of_reserved_mem.h> > >#include <linux/pm_runtime.h> > >+#include <linux/debugfs.h> > > > >#include <drm/drmP.h> > >#include <drm/drm_atomic.h> > >@@ -29,6 +30,7 @@ > >#include <drm/drm_gem_framebuffer_helper.h> > >#include <drm/drm_modeset_helper.h> > >#include <drm/drm_of.h> > >+#include <drm/drm_debugfs.h> > > > >#include "malidp_drv.h" > >#include "malidp_regs.h" > >@@ -327,6 +329,62 @@ static int malidp_dumb_create(struct drm_file *file_priv, > > return drm_gem_cma_dumb_create_internal(file_priv, drm, args); > >} > > > >+#ifdef CONFIG_DEBUG_FS > >+ > >+static void malidp_error_stats_init(struct malidp_error_stats *error_stats) > >+{ > >+ atomic_set(&error_stats->num_errors, 0); > >+ atomic_set(&error_stats->last_error_status, 0); > >+ atomic64_set(&error_stats->last_error_vblank, -1); > >+} > >+ > >+void malidp_error(struct malidp_error_stats *error_stats, u32 status, > >+ u64 vblank) > >+{ > >+ atomic_set(&error_stats->last_error_status, status); > >+ atomic64_set(&error_stats->last_error_vblank, vblank); > >+ atomic_inc(&error_stats->num_errors); > >+} > >+ > > Do we already have a lock we could use to make sure the status printed > is consistent? I know this is "only debug", but it could be annoying > if we can't trust that the last_error_status does actually match the > last_error_vblank. > No, we don't have any lock. Yes that would be annoying, I will add one. > >+void malidp_error_stats_dump(const char *prefix, > >+ struct malidp_error_stats *error_stats, > >+ struct seq_file *m) > >+{ > >+ seq_printf(m, "[%s] num_errors : %d\n", prefix, > >+ atomic_read(&error_stats->num_errors)); > >+ seq_printf(m, "[%s] last_error_status : 0x%08x\n", prefix, > >+ atomic_read(&error_stats->last_error_status)); > >+ seq_printf(m, "[%s] last_error_vblank : %ld\n", prefix, > >+ atomic64_read(&error_stats->last_error_vblank)); > >+} > >+ > >+static int malidp_show_stats(struct seq_file *m, void *arg) > >+{ > >+ struct drm_info_node *node = (struct drm_info_node *)m->private; > >+ struct drm_device *drm = node->minor->dev; > >+ struct malidp_drm *malidp = drm->dev_private; > >+ > >+ malidp_error_stats_dump("DE", &malidp->de_errors, m); > >+ malidp_error_stats_dump("SE", &malidp->se_errors, m); > >+ return 0; > >+} > >+ > >+static struct drm_info_list malidp_debugfs_list[] = { > >+ { "debug", malidp_show_stats, 0 }, > >+}; > > I think it would be pretty useful to have a way to reset the counters. > Maybe by just writing anything to the file? > drm_debugfs_fops doesn't allow writing. I see no reason using our own debugs_fops, but do you think worth the trouble? > Thanks, > -Brian > > >+ > >+static int malidp_debugfs_init(struct drm_minor *minor) > >+{ > >+ struct malidp_drm *malidp = minor->dev->dev_private; > >+ > >+ malidp_error_stats_init(&malidp->de_errors); > >+ malidp_error_stats_init(&malidp->se_errors); > >+ return drm_debugfs_create_files(malidp_debugfs_list, > >+ ARRAY_SIZE(malidp_debugfs_list), minor->debugfs_root, minor); > >+} > >+ > >+#endif //CONFIG_DEBUG_FS > >+ > >static struct drm_driver malidp_driver = { > > .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC | > > DRIVER_PRIME, > >@@ -343,6 +401,9 @@ static struct drm_driver malidp_driver = { > > .gem_prime_vmap = drm_gem_cma_prime_vmap, > > .gem_prime_vunmap = drm_gem_cma_prime_vunmap, > > .gem_prime_mmap = drm_gem_cma_prime_mmap, > >+#ifdef CONFIG_DEBUG_FS > >+ .debugfs_init = malidp_debugfs_init, > >+#endif > > .fops = &fops, > > .name = "mali-dp", > > .desc = "ARM Mali Display Processor driver", > >diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h > >index c70989b..c49056c 100644 > >--- a/drivers/gpu/drm/arm/malidp_drv.h > >+++ b/drivers/gpu/drm/arm/malidp_drv.h > >@@ -18,6 +18,12 @@ > >#include <drm/drmP.h> > >#include "malidp_hw.h" > > > >+struct malidp_error_stats { > >+ atomic_t num_errors; > >+ atomic_t last_error_status; > >+ atomic64_t last_error_vblank; > >+}; > >+ > >struct malidp_drm { > > struct malidp_hw_device *dev; > > struct drm_crtc crtc; > >@@ -25,6 +31,10 @@ struct malidp_drm { > > struct drm_pending_vblank_event *event; > > atomic_t config_valid; > > u32 core_id; > >+#ifdef CONFIG_DEBUG_FS > >+ struct malidp_error_stats de_errors; > >+ struct malidp_error_stats se_errors; > >+#endif > >}; > > > >#define crtc_to_malidp_device(x) container_of(x, struct malidp_drm, crtc) > >@@ -62,6 +72,11 @@ struct malidp_crtc_state { > >int malidp_de_planes_init(struct drm_device *drm); > >int malidp_crtc_init(struct drm_device *drm); > > > >+#ifdef CONFIG_DEBUG_FS > >+void malidp_error(struct malidp_error_stats *error_stats, u32 status, > >+ u64 vblank); > >+#endif > >+ > >/* often used combination of rotational bits */ > >#define MALIDP_ROTATED_MASK (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270) > > > >diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c > >index d789b46..ec40a44 100644 > >--- a/drivers/gpu/drm/arm/malidp_hw.c > >+++ b/drivers/gpu/drm/arm/malidp_hw.c > >@@ -632,10 +632,16 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { > > MALIDP500_DE_IRQ_VSYNC | > > MALIDP500_DE_IRQ_GLOBAL, > > .vsync_irq = MALIDP500_DE_IRQ_VSYNC, > >+ .err_mask = MALIDP_DE_IRQ_UNDERRUN | > >+ MALIDP500_DE_IRQ_AXI_ERR | > >+ MALIDP500_DE_IRQ_SATURATION, > > }, > > .se_irq_map = { > > .irq_mask = MALIDP500_SE_IRQ_CONF_MODE, > > .vsync_irq = 0, > >+ .err_mask = MALIDP500_SE_IRQ_INIT_BUSY | > >+ MALIDP500_SE_IRQ_AXI_ERROR | > >+ MALIDP500_SE_IRQ_OVERRUN, > > }, > > .dc_irq_map = { > > .irq_mask = MALIDP500_DE_IRQ_CONF_VALID, > >@@ -669,10 +675,15 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { > > .irq_mask = MALIDP_DE_IRQ_UNDERRUN | > > MALIDP550_DE_IRQ_VSYNC, > > .vsync_irq = MALIDP550_DE_IRQ_VSYNC, > >+ .err_mask = MALIDP_DE_IRQ_UNDERRUN | > >+ MALIDP550_DE_IRQ_SATURATION | > >+ MALIDP550_DE_IRQ_AXI_ERR, > > }, > > .se_irq_map = { > >- .irq_mask = MALIDP550_SE_IRQ_EOW | > >- MALIDP550_SE_IRQ_AXI_ERR, > >+ .irq_mask = MALIDP550_SE_IRQ_EOW, > >+ .err_mask = MALIDP550_SE_IRQ_AXI_ERR | > >+ MALIDP550_SE_IRQ_OVR | > >+ MALIDP550_SE_IRQ_IBSY, > > }, > > .dc_irq_map = { > > .irq_mask = MALIDP550_DC_IRQ_CONF_VALID, > >@@ -707,10 +718,20 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { > > MALIDP650_DE_IRQ_DRIFT | > > MALIDP550_DE_IRQ_VSYNC, > > .vsync_irq = MALIDP550_DE_IRQ_VSYNC, > >+ .err_mask = MALIDP_DE_IRQ_UNDERRUN | > >+ MALIDP650_DE_IRQ_DRIFT | > >+ MALIDP550_DE_IRQ_SATURATION | > >+ MALIDP550_DE_IRQ_AXI_ERR | > >+ MALIDP650_DE_IRQ_ACEV1 | > >+ MALIDP650_DE_IRQ_ACEV2 | > >+ MALIDP650_DE_IRQ_ACEG | > >+ MALIDP650_DE_IRQ_AXIEP, > > }, > > .se_irq_map = { > >- .irq_mask = MALIDP550_SE_IRQ_EOW | > >- MALIDP550_SE_IRQ_AXI_ERR, > >+ .irq_mask = MALIDP550_SE_IRQ_EOW, > >+ .err_mask = MALIDP550_SE_IRQ_AXI_ERR | > >+ MALIDP550_SE_IRQ_OVR | > >+ MALIDP550_SE_IRQ_IBSY, > > }, > > .dc_irq_map = { > > .irq_mask = MALIDP550_DC_IRQ_CONF_VALID, > >@@ -799,10 +820,17 @@ static irqreturn_t malidp_de_irq(int irq, void *arg) > > return ret; > > > > mask = malidp_hw_read(hwdev, MALIDP_REG_MASKIRQ); > >- status &= mask; > >+ /* keep the status of the enabled interrupts, plus the error bits */ > >+ status &= (mask | de->err_mask); > > if ((status & de->vsync_irq) && malidp->crtc.enabled) > > drm_crtc_handle_vblank(&malidp->crtc); > > > >+#ifdef CONFIG_DEBUG_FS > >+ if (status & de->err_mask) { > >+ malidp_error(&malidp->de_errors, status, > >+ drm_crtc_vblank_count(&malidp->crtc)); > >+ } > >+#endif > > malidp_hw_clear_irq(hwdev, MALIDP_DE_BLOCK, status); > > > > return (ret == IRQ_NONE) ? IRQ_HANDLED : ret; > >@@ -878,11 +906,15 @@ static irqreturn_t malidp_se_irq(int irq, void *arg) > > return IRQ_NONE; > > > > status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); > >- if (!(status & se->irq_mask)) > >+ if (!(status & (se->irq_mask | se->err_mask))) > > return IRQ_NONE; > > > >+#ifdef CONFIG_DEBUG_FS > >+ if (status & se->err_mask) > >+ malidp_error(&malidp->se_errors, status, > >+ drm_crtc_vblank_count(&malidp->crtc)); > >+#endif > > mask = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_MASKIRQ); > >- status = malidp_hw_read(hwdev, hw->map.se_base + MALIDP_REG_STATUS); > > status &= mask; > > /* ToDo: status decoding and firing up of VSYNC and page flip events */ > > > >diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h > >index b5dd6c7..b45f99f 100644 > >--- a/drivers/gpu/drm/arm/malidp_hw.h > >+++ b/drivers/gpu/drm/arm/malidp_hw.h > >@@ -52,6 +52,7 @@ struct malidp_format_id { > >struct malidp_irq_map { > > u32 irq_mask; /* mask of IRQs that can be enabled in the block */ > > u32 vsync_irq; /* IRQ bit used for signaling during VSYNC */ > >+ u32 err_mask; /* mask of bits that represent errors */ > >}; > > > >struct malidp_layer { > >diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h > >index 149024f..7c37390 100644 > >--- a/drivers/gpu/drm/arm/malidp_regs.h > >+++ b/drivers/gpu/drm/arm/malidp_regs.h > >@@ -53,6 +53,8 @@ > >#define MALIDP550_DE_IRQ_AXI_ERR (1 << 16) > >#define MALIDP550_SE_IRQ_EOW (1 << 0) > >#define MALIDP550_SE_IRQ_AXI_ERR (1 << 16) > >+#define MALIDP550_SE_IRQ_OVR (1 << 17) > >+#define MALIDP550_SE_IRQ_IBSY (1 << 18) > >#define MALIDP550_DC_IRQ_CONF_VALID (1 << 0) > >#define MALIDP550_DC_IRQ_CONF_MODE (1 << 4) > >#define MALIDP550_DC_IRQ_CONF_ACTIVE (1 << 16) > >@@ -60,6 +62,10 @@ > >#define MALIDP550_DC_IRQ_SE (1 << 24) > > > >#define MALIDP650_DE_IRQ_DRIFT (1 << 4) > >+#define MALIDP650_DE_IRQ_ACEV1 (1 << 17) > >+#define MALIDP650_DE_IRQ_ACEV2 (1 << 18) > >+#define MALIDP650_DE_IRQ_ACEG (1 << 19) > >+#define MALIDP650_DE_IRQ_AXIEP (1 << 28) > > > >/* bit masks that are common between products */ > >#define MALIDP_CFG_VALID (1 << 0) > >-- > >2.7.4 > > -- Cheers, Alex G _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel