On 21/06/2021 14:39, Boris Brezillon wrote: > Do the exception -> string translation using a table so we can add extra > fields if we need to. While at it add an error field to ease the > exception -> error conversion which we'll need if we want to set the > fence error to something that reflects the exception code. > > TODO: fix the error codes. TODO: Do the TODO ;) I'm not sure how useful translating the hardware error codes to Linux ones are. E.g. 'OOM' means something quite different from a normal -ENOMEM. One is running out of a space in a predefined buffer, the other is Linux not able to allocate memory. > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> > --- > drivers/gpu/drm/panfrost/panfrost_device.c | 134 +++++++++++++-------- > drivers/gpu/drm/panfrost/panfrost_device.h | 1 + > 2 files changed, 88 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c > index f7f5ca94f910..2de011cee258 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_device.c > +++ b/drivers/gpu/drm/panfrost/panfrost_device.c > @@ -292,55 +292,95 @@ void panfrost_device_fini(struct panfrost_device *pfdev) > panfrost_clk_fini(pfdev); > } > > -const char *panfrost_exception_name(u32 exception_code) > -{ > - switch (exception_code) { > - /* Non-Fault Status code */ > - case 0x00: return "NOT_STARTED/IDLE/OK"; > - case 0x01: return "DONE"; > - case 0x02: return "INTERRUPTED"; > - case 0x03: return "STOPPED"; > - case 0x04: return "TERMINATED"; > - case 0x08: return "ACTIVE"; > - /* Job exceptions */ > - case 0x40: return "JOB_CONFIG_FAULT"; > - case 0x41: return "JOB_POWER_FAULT"; > - case 0x42: return "JOB_READ_FAULT"; > - case 0x43: return "JOB_WRITE_FAULT"; > - case 0x44: return "JOB_AFFINITY_FAULT"; > - case 0x48: return "JOB_BUS_FAULT"; > - case 0x50: return "INSTR_INVALID_PC"; > - case 0x51: return "INSTR_INVALID_ENC"; > - case 0x52: return "INSTR_TYPE_MISMATCH"; > - case 0x53: return "INSTR_OPERAND_FAULT"; > - case 0x54: return "INSTR_TLS_FAULT"; > - case 0x55: return "INSTR_BARRIER_FAULT"; > - case 0x56: return "INSTR_ALIGN_FAULT"; > - case 0x58: return "DATA_INVALID_FAULT"; > - case 0x59: return "TILE_RANGE_FAULT"; > - case 0x5A: return "ADDR_RANGE_FAULT"; > - case 0x60: return "OUT_OF_MEMORY"; > - /* GPU exceptions */ > - case 0x80: return "DELAYED_BUS_FAULT"; > - case 0x88: return "SHAREABILITY_FAULT"; > - /* MMU exceptions */ > - case 0xC1: return "TRANSLATION_FAULT_LEVEL1"; > - case 0xC2: return "TRANSLATION_FAULT_LEVEL2"; > - case 0xC3: return "TRANSLATION_FAULT_LEVEL3"; > - case 0xC4: return "TRANSLATION_FAULT_LEVEL4"; > - case 0xC8: return "PERMISSION_FAULT"; > - case 0xC9 ... 0xCF: return "PERMISSION_FAULT"; > - case 0xD1: return "TRANSTAB_BUS_FAULT_LEVEL1"; > - case 0xD2: return "TRANSTAB_BUS_FAULT_LEVEL2"; > - case 0xD3: return "TRANSTAB_BUS_FAULT_LEVEL3"; > - case 0xD4: return "TRANSTAB_BUS_FAULT_LEVEL4"; > - case 0xD8: return "ACCESS_FLAG"; > - case 0xD9 ... 0xDF: return "ACCESS_FLAG"; > - case 0xE0 ... 0xE7: return "ADDRESS_SIZE_FAULT"; > - case 0xE8 ... 0xEF: return "MEMORY_ATTRIBUTES_FAULT"; > +#define PANFROST_EXCEPTION(id, err) \ > + [DRM_PANFROST_EXCEPTION_ ## id] = { \ > + .name = #id, \ > + .error = err, \ > } > > - return "UNKNOWN"; > +struct panfrost_exception_info { > + const char *name; > + int error; > +}; > + > +static const struct panfrost_exception_info panfrost_exception_infos[] = { > + PANFROST_EXCEPTION(OK, 0), > + PANFROST_EXCEPTION(DONE, 0), > + PANFROST_EXCEPTION(STOPPED, 0), > + PANFROST_EXCEPTION(TERMINATED, 0), STOPPED/TERMINATED are not really 'success' from an application perspective. But equally they are ones that need special handling from the kernel. > + PANFROST_EXCEPTION(KABOOM, 0), > + PANFROST_EXCEPTION(EUREKA, 0), > + PANFROST_EXCEPTION(ACTIVE, 0), > + PANFROST_EXCEPTION(JOB_CONFIG_FAULT, -EINVAL), > + PANFROST_EXCEPTION(JOB_POWER_FAULT, -ECANCELED), > + PANFROST_EXCEPTION(JOB_READ_FAULT, -EINVAL), > + PANFROST_EXCEPTION(JOB_WRITE_FAULT, -EINVAL), > + PANFROST_EXCEPTION(JOB_AFFINITY_FAULT, -EINVAL), > + PANFROST_EXCEPTION(JOB_BUS_FAULT, -EINVAL), > + PANFROST_EXCEPTION(INSTR_INVALID_PC, -EINVAL), > + PANFROST_EXCEPTION(INSTR_INVALID_ENC, -EINVAL), > + PANFROST_EXCEPTION(INSTR_BARRIER_FAULT, -EINVAL), > + PANFROST_EXCEPTION(DATA_INVALID_FAULT, -EINVAL), > + PANFROST_EXCEPTION(TILE_RANGE_FAULT, -EINVAL), > + PANFROST_EXCEPTION(ADDR_RANGE_FAULT, -EINVAL), > + PANFROST_EXCEPTION(IMPRECISE_FAULT, -EINVAL), > + PANFROST_EXCEPTION(OOM, -ENOMEM), > + PANFROST_EXCEPTION(UNKNOWN, -EINVAL), We should probably make a distinction between this 'special' UNKNOWN that the hardware can report... > + PANFROST_EXCEPTION(DELAYED_BUS_FAULT, -EINVAL), > + PANFROST_EXCEPTION(GPU_SHAREABILITY_FAULT, -ECANCELED), > + PANFROST_EXCEPTION(SYS_SHAREABILITY_FAULT, -ECANCELED), > + PANFROST_EXCEPTION(GPU_CACHEABILITY_FAULT, -ECANCELED), > + PANFROST_EXCEPTION(TRANSLATION_FAULT_0, -EINVAL), > + PANFROST_EXCEPTION(TRANSLATION_FAULT_1, -EINVAL), > + PANFROST_EXCEPTION(TRANSLATION_FAULT_2, -EINVAL), > + PANFROST_EXCEPTION(TRANSLATION_FAULT_3, -EINVAL), > + PANFROST_EXCEPTION(TRANSLATION_FAULT_4, -EINVAL), > + PANFROST_EXCEPTION(TRANSLATION_FAULT_IDENTITY, -EINVAL), > + PANFROST_EXCEPTION(PERM_FAULT_0, -EINVAL), > + PANFROST_EXCEPTION(PERM_FAULT_1, -EINVAL), > + PANFROST_EXCEPTION(PERM_FAULT_2, -EINVAL), > + PANFROST_EXCEPTION(PERM_FAULT_3, -EINVAL), > + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_0, -EINVAL), > + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_1, -EINVAL), > + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_2, -EINVAL), > + PANFROST_EXCEPTION(TRANSTAB_BUS_FAULT_3, -EINVAL), > + PANFROST_EXCEPTION(ACCESS_FLAG_0, -EINVAL), > + PANFROST_EXCEPTION(ACCESS_FLAG_1, -EINVAL), > + PANFROST_EXCEPTION(ACCESS_FLAG_2, -EINVAL), > + PANFROST_EXCEPTION(ACCESS_FLAG_3, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN0, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN1, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN2, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_IN3, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT0, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT1, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT2, -EINVAL), > + PANFROST_EXCEPTION(ADDR_SIZE_FAULT_OUT3, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_FAULT_0, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_FAULT_1, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_FAULT_2, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_FAULT_3, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_0, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_1, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_2, -EINVAL), > + PANFROST_EXCEPTION(MEM_ATTR_NONCACHE_3, -EINVAL), > +}; > + > +const char *panfrost_exception_name(u32 exception_code) > +{ > + if (WARN_ON(exception_code >= ARRAY_SIZE(panfrost_exception_infos) || > + !panfrost_exception_infos[exception_code].name)) > + return "UNKNOWN"; ...and this UNKNOWN that just means we don't have a clue what the magic number is. Steve > + > + return panfrost_exception_infos[exception_code].name; > +} > + > +int panfrost_exception_to_error(u32 exception_code) > +{ > + if (WARN_ON(exception_code >= ARRAY_SIZE(panfrost_exception_infos))) > + return 0; > + > + return panfrost_exception_infos[exception_code].error; > } > > void panfrost_device_reset(struct panfrost_device *pfdev) > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h > index 1c6a3597eba0..498c7b5dccd0 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_device.h > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h > @@ -174,6 +174,7 @@ int panfrost_device_resume(struct device *dev); > int panfrost_device_suspend(struct device *dev); > > const char *panfrost_exception_name(u32 exception_code); > +int panfrost_exception_to_error(u32 exception_code); > > static inline void > panfrost_device_schedule_reset(struct panfrost_device *pfdev) >