Enable the scheduling timeout error interrupt and set it to a low value to happen periodically, since it can be missed in HW in certain conditions. Increment a channel-specific counter in software if the current channel hasn't advanced. Abort the channel once the timeout limit is hit (with the periodic granularity). The error notifier is set to NOUVEAU_GEM_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT when this occurs. A new KEPLER_SET_CHANNEL_TIMEOUT mthd sets the timeout limit, in milliseconds. The interrupt granularity is set to 100 ms. Signed-off-by: Konsta Hölttä <kholtta@xxxxxxxxxx> --- drm/nouveau/include/nvif/class.h | 8 ++++ drm/nouveau/nvkm/engine/fifo/gk104.c | 78 +++++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/drm/nouveau/include/nvif/class.h b/drm/nouveau/include/nvif/class.h index 381c72d..f9a3647 100644 --- a/drm/nouveau/include/nvif/class.h +++ b/drm/nouveau/include/nvif/class.h @@ -620,18 +620,26 @@ struct fermi_a_zbc_depth_v0 { __u8 format; __u8 index; __u8 pad03[5]; __u32 ds; __u32 l2; }; #define KEPLER_SET_CHANNEL_PRIORITY 0x42 // XXX +#define KEPLER_SET_CHANNEL_TIMEOUT 0x43 // XXX + struct kepler_set_channel_priority_v0 { __u8 version; #define KEPLER_SET_CHANNEL_PRIORITY_LOW 0x00 #define KEPLER_SET_CHANNEL_PRIORITY_MEDIUM 0x01 #define KEPLER_SET_CHANNEL_PRIORITY_HIGH 0x02 __u8 priority; __u8 pad03[6]; }; +struct kepler_set_channel_timeout_v0 { + __u8 version; + __u8 pad03[3]; + __u32 timeout_ms; +}; + #endif diff --git a/drm/nouveau/nvkm/engine/fifo/gk104.c b/drm/nouveau/nvkm/engine/fifo/gk104.c index 2bab45e..15360a6 100644 --- a/drm/nouveau/nvkm/engine/fifo/gk104.c +++ b/drm/nouveau/nvkm/engine/fifo/gk104.c @@ -83,18 +83,25 @@ struct gk104_fifo_base { struct gk104_fifo_chan { struct nvkm_fifo_chan base; u32 engine; enum { STOPPED, RUNNING, KILLED } state; + struct { + u32 sum_ms; + u32 limit_ms; + u32 gpfifo_get; + } timeout; }; +#define GRFIFO_TIMEOUT_CHECK_PERIOD_MS 100 + /******************************************************************************* * FIFO channel objects ******************************************************************************/ static void gk104_fifo_runlist_update(struct gk104_fifo_priv *priv, u32 engine) { struct nvkm_bar *bar = nvkm_bar(priv); @@ -288,16 +295,21 @@ gk104_fifo_chan_ctor(struct nvkm_object *parent, struct nvkm_object *engine, nv_wo32(base, 0x94, 0x30000001); nv_wo32(base, 0x9c, 0x00000100); nv_wo32(base, 0xac, 0x0000001f); nv_wo32(base, 0xe8, chan->base.chid); nv_wo32(base, 0xb8, 0xf8000000); nv_wo32(base, 0xf8, 0x10003080); /* 0x002310 */ nv_wo32(base, 0xfc, 0x10000010); /* 0x002350 */ bar->flush(bar); + + chan->timeout.sum_ms = 0; + chan->timeout.limit_ms = -1; + chan->timeout.gpfifo_get = 0; + return 0; } static int gk104_fifo_chan_init(struct nvkm_object *object) { struct nvkm_gpuobj *base = nv_gpuobj(object->parent); struct gk104_fifo_priv *priv = (void *)object->engine; @@ -379,21 +391,39 @@ gk104_fifo_chan_set_priority(struct nvkm_object *object, void *data, u32 size) return -EINVAL; } } return ret; } int +gk104_fifo_chan_set_timeout(struct nvkm_object *object, void *data, u32 size) +{ + struct gk104_fifo_chan *chan = (void *)object; + union { + struct kepler_set_channel_timeout_v0 v0; + } *args = data; + int ret; + + if (nvif_unpack(args->v0, 0, 0, false)) { + chan->timeout.limit_ms = args->v0.timeout_ms; + } + + return ret; +} + +int gk104_fifo_chan_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size) { switch (mthd) { case KEPLER_SET_CHANNEL_PRIORITY: return gk104_fifo_chan_set_priority(object, data, size); + case KEPLER_SET_CHANNEL_TIMEOUT: + return gk104_fifo_chan_set_timeout(object, data, size); default: break; } return -EINVAL; } struct nvkm_ofuncs gk104_fifo_chan_ofuncs = { @@ -604,61 +634,83 @@ gk104_fifo_intr_bind(struct gk104_fifo_priv *priv) } static const struct nvkm_enum gk104_fifo_sched_reason[] = { { 0x0a, "CTXSW_TIMEOUT" }, {} }; +static bool +gk104_fifo_update_timeout(struct gk104_fifo_priv *priv, + struct gk104_fifo_chan *chan, u32 dt) +{ + u32 gpfifo_get = nv_rd32(priv, 34); + if (gpfifo_get == chan->timeout.gpfifo_get) { + chan->timeout.sum_ms += dt; + } else { + chan->timeout.sum_ms = dt; + } + + chan->timeout.gpfifo_get = gpfifo_get; + + return chan->timeout.sum_ms > chan->timeout.limit_ms; +} + static void gk104_fifo_intr_sched_ctxsw(struct gk104_fifo_priv *priv) { struct nvkm_engine *engine; struct gk104_fifo_chan *chan; u32 engn; for (engn = 0; engn < ARRAY_SIZE(fifo_engine); engn++) { u32 stat = nv_rd32(priv, 0x002640 + (engn * 0x04)); u32 busy = (stat & 0x80000000); u32 next = (stat & 0x07ff0000) >> 16; - u32 chsw = (stat & 0x00008000); - u32 save = (stat & 0x00004000); - u32 load = (stat & 0x00002000); + u32 cxsw = (stat & 0x0000e000) >> 13; u32 prev = (stat & 0x000007ff); - u32 chid = load ? next : prev; - (void)save; + /* if loading context, take next id */ + u32 chid = cxsw == 5 ? next : prev; - if (busy && chsw) { + nv_error(priv, "ctxsw eng stat: %08x\n", stat); + /* doing context switch? */ + if (busy && (cxsw >= 5 && cxsw <= 7)) { if (!(chan = (void *)priv->base.channel[chid])) continue; if (!(engine = gk104_fifo_engine(priv, engn))) continue; - nvkm_fifo_eevent(&priv->base, chid, - NOUVEAU_GEM_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); - - gk104_fifo_recover(priv, engine, chan); + if (gk104_fifo_update_timeout(priv, chan, + GRFIFO_TIMEOUT_CHECK_PERIOD_MS)) { + nvkm_fifo_eevent(&priv->base, chid, + NOUVEAU_GEM_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + gk104_fifo_recover(priv, engine, chan); + } else { + nv_debug(priv, "fifo waiting for ctxsw %d ms on ch %d\n", + chan->timeout.sum_ms, chid); + } } } } static void gk104_fifo_intr_sched(struct gk104_fifo_priv *priv) { u32 intr = nv_rd32(priv, 0x00254c); u32 code = intr & 0x000000ff; const struct nvkm_enum *en; char enunk[6] = ""; en = nvkm_enum_find(gk104_fifo_sched_reason, code); if (!en) snprintf(enunk, sizeof(enunk), "UNK%02x", code); - nv_error(priv, "SCHED_ERROR [ %s ]\n", en ? en->name : enunk); + /* this is a normal situation, not so loud */ + nv_debug(priv, "SCHED_ERROR [ %s ]\n", en ? en->name : enunk); switch (code) { case 0x0a: gk104_fifo_intr_sched_ctxsw(priv); break; default: break; } @@ -1131,18 +1183,22 @@ gk104_fifo_init(struct nvkm_object *object) /* PBDMA[n].HCE */ for (i = 0; i < priv->spoon_nr; i++) { nv_wr32(priv, 0x040148 + (i * 0x2000), 0xffffffff); /* INTR */ nv_wr32(priv, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */ } nv_wr32(priv, 0x002254, 0x10000000 | priv->user.bar.offset >> 12); + /* enable interrupts */ nv_wr32(priv, 0x002100, 0xffffffff); nv_wr32(priv, 0x002140, 0x7fffffff); + + /* engine timeout */ + nv_wr32(priv, 0x002a0c, 0x80000000 | (1000 * GRFIFO_TIMEOUT_CHECK_PERIOD_MS)); return 0; } void gk104_fifo_dtor(struct nvkm_object *object) { struct gk104_fifo_priv *priv = (void *)object; int i; -- 2.1.4 -- To unsubscribe from this list: send the line "unsubscribe linux-tegra" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html