From: Jerome Glisse <jglisse@xxxxxxxxxx> This try to identify the faulty user command stream that caused lockup. If it finds one it create big blob that contains all information, this include packet stream but also snapshot of all bo used by the faulty packet stream. This means that the blod is self contained and can be fully replayed. v2: Better commit message. Split out the radeon debugfs change into its own patch. Split out the vm offset change into its own patch. Add data buffer flags so kernel can flags bo that are dumped with valid data. Remove the family from the header and instead let the userspace tools rely on the pci id. Avoid doing whitespace/indentation cleaning. v3: Add a chunk size field so older userspace can easily skip newer chunk. v4: Add flags to cmd buffer to facilitate userspace tools job. Allow userspace tool to easily know if it needs to clear offset or to add relocation packet. Signed-off-by: Jerome Glisse <jglisse@xxxxxxxxxx> --- drivers/gpu/drm/radeon/radeon.h | 16 ++++ drivers/gpu/drm/radeon/radeon_cs.c | 72 +++++++++++++++++-- drivers/gpu/drm/radeon/radeon_device.c | 17 +++++ drivers/gpu/drm/radeon/radeon_object.h | 11 +++- drivers/gpu/drm/radeon/radeon_ring.c | 18 +++++- drivers/gpu/drm/radeon/radeon_sa.c | 121 ++++++++++++++++++++++++++++++++ include/drm/radeon_drm.h | 110 +++++++++++++++++++++++++++++ 7 files changed, 357 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index dc51ee9..3fbb469 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -74,6 +74,7 @@ #include "radeon_family.h" #include "radeon_mode.h" #include "radeon_reg.h" +#include "radeon_drm.h" /* * Modules parameters. @@ -395,6 +396,11 @@ struct radeon_sa_manager { struct radeon_sa_bo; +struct radeon_dump { + struct rati_data_buffer buffer; + struct radeon_bo *bo; +}; + /* sub-allocation buffer */ struct radeon_sa_bo { struct list_head olist; @@ -403,6 +409,9 @@ struct radeon_sa_bo { unsigned soffset; unsigned eoffset; struct radeon_fence *fence; + unsigned nbuffers; + struct radeon_dump *buffers; + uint32_t cmd_flags; }; /* @@ -846,6 +855,8 @@ struct radeon_cs_parser { u32 cs_flags; u32 ring; s32 priority; + unsigned nbuffers; + struct radeon_dump *buffers; }; extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); @@ -1548,6 +1559,11 @@ struct radeon_device { unsigned debugfs_count; /* virtual memory */ struct radeon_vm_manager vm_manager; + /* lockup blob dumping */ + unsigned blob_dump; + struct rati_header blob_header; + uint64_t blob_size; + void *blob; }; int radeon_device_init(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index e86907a..a5b6610 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -36,7 +36,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) { struct drm_device *ddev = p->rdev->ddev; struct radeon_cs_chunk *chunk; - unsigned i, j; + unsigned i, j, ib; bool duplicate; if (p->chunk_relocs_idx == -1) { @@ -53,7 +53,16 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->relocs == NULL) { return -ENOMEM; } - for (i = 0; i < p->nrelocs; i++) { + p->buffers = NULL; + p->nbuffers = 0; + if (p->rdev->blob_dump) { + p->buffers = kcalloc(p->nrelocs, sizeof(*p->buffers), GFP_KERNEL); + if (p->buffers == NULL) { + return -ENOMEM; + } + p->nbuffers = p->nrelocs; + } + for (i = 0, ib = 0; i < p->nrelocs; i++) { struct drm_radeon_cs_reloc *r; duplicate = false; @@ -85,8 +94,24 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p) radeon_bo_list_add_object(&p->relocs[i].lobj, &p->validated); - } else + /* initialize dump struct */ + if (p->rdev->blob_dump) { + p->buffers[ib].bo = p->relocs[i].robj; + p->buffers[ib].buffer.id = RATI_DATA_BUFFER; + p->buffers[ib].buffer.ver = 1; + p->buffers[ib].buffer.size = radeon_bo_size(p->buffers[i].bo); + p->buffers[ib].buffer.paded_ndw = ALIGN(p->buffers[i].buffer.size >> 2, 2); + p->buffers[ib].buffer.alignment = radeon_bo_alignment(p->buffers[i].bo); + p->buffers[ib].buffer.flags = 0; + p->buffers[ib].buffer.chunk_size = sizeof(p->buffers[ib].buffer) + p->buffers[ib].buffer.paded_ndw * 4; + ib++; + } + } else { p->relocs[i].handle = 0; + if (p->rdev->blob_dump) { + p->nbuffers--; + } + } } return radeon_bo_list_validate(&p->validated); } @@ -303,11 +328,13 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (!error) + if (!error) { ttm_eu_fence_buffer_objects(&parser->validated, parser->ib.fence); - else + } else { ttm_eu_backoff_reservation(&parser->validated); + kfree(parser->buffers); + } if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { @@ -335,6 +362,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, struct radeon_cs_parser *parser) { struct radeon_cs_chunk *ib_chunk; + unsigned i; int r; if (parser->chunk_ib_idx == -1) @@ -370,6 +398,19 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Failed to synchronize rings !\n"); } parser->ib.vm_id = 0; + + /* update dump informations */ + if (parser->rdev->blob_dump) { + for (i = 0; i < parser->nbuffers; i++) { + parser->buffers[i].buffer.offset = radeon_bo_gpu_offset(parser->buffers[i].bo); + } + parser->ib.sa_bo->buffers = parser->buffers; + parser->ib.sa_bo->nbuffers = parser->nbuffers; + parser->buffers = NULL; + } + parser->ib.sa_bo->cmd_flags = RATI_CMD_WITH_RELOC | + RATI_CMD_CLEAR_OFFSET; + r = radeon_ib_schedule(rdev, &parser->ib); if (r) { DRM_ERROR("Failed to schedule IB !\n"); @@ -382,14 +423,24 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, { struct radeon_bo_list *lobj; struct radeon_bo *bo; + uint64_t vm_offset; + unsigned i; int r; list_for_each_entry(lobj, &parser->validated, tv.head) { bo = lobj->bo; - r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem, NULL); + r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem, &vm_offset); if (r) { return r; } + if (parser->rdev->blob_dump) { + for (i = 0; i < parser->nbuffers; i++) { + if (parser->buffers[i].bo == bo) { + parser->buffers[i].buffer.offset = vm_offset; + break; + } + } + } } return 0; } @@ -488,6 +539,15 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, */ parser->ib.gpu_addr = parser->ib.sa_bo->soffset; parser->ib.is_const_ib = false; + + /* update dump informations */ + if (parser->rdev->blob_dump) { + parser->ib.sa_bo->buffers = parser->buffers; + parser->ib.sa_bo->nbuffers = parser->nbuffers; + parser->buffers = NULL; + } + parser->ib.sa_bo->cmd_flags = RATI_CMD_WITH_RELOC; + r = radeon_ib_schedule(rdev, &parser->ib); out: if (!r) { diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b5f4fb9..52af7fc 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -722,6 +722,14 @@ int radeon_device_init(struct radeon_device *rdev, rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; rdev->accel_working = false; + /* initializa blob dumping */ + rdev->blob_dump = true; + rdev->blob_header.id = RATI_HEADER; + rdev->blob_header.ver = 1; + rdev->blob_header.chunk_size = sizeof(rdev->blob_header); + rdev->blob_header.pciid = rdev->pdev->vendor << 16; + rdev->blob_header.pciid |= rdev->pdev->device; + DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", radeon_family_name[rdev->family], pdev->vendor, pdev->device, pdev->subsystem_vendor, pdev->subsystem_device); @@ -860,6 +868,10 @@ void radeon_device_fini(struct radeon_device *rdev) iounmap(rdev->rmmio); rdev->rmmio = NULL; radeon_debugfs_remove_files(rdev); + + rdev->blob_size = 0; + vfree(rdev->blob); + rdev->blob = NULL; } @@ -987,6 +999,11 @@ int radeon_gpu_reset(struct radeon_device *rdev) int r; int resched; + /* FIXME we should detect which ring caused a lockup and pass + * appropriate ring parameter here + */ + radeon_sa_bo_faulty(rdev, &rdev->ring_tmp_bo, RADEON_RING_TYPE_GFX_INDEX); + radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index befec7d..265cc0c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -93,6 +93,11 @@ static inline unsigned radeon_bo_gpu_page_alignment(struct radeon_bo *bo) return (bo->tbo.mem.page_alignment << PAGE_SHIFT) / RADEON_GPU_PAGE_SIZE; } +static inline unsigned radeon_bo_alignment(struct radeon_bo *bo) +{ + return (bo->tbo.mem.page_alignment << PAGE_SHIFT); +} + /** * radeon_bo_mmap_offset - return mmap offset of bo * @bo: radeon object for which we query the offset @@ -173,10 +178,14 @@ extern int radeon_sa_bo_new(struct radeon_device *rdev, extern void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, struct radeon_fence *fence); +extern void radeon_sa_bo_faulty(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + unsigned ring); #if defined(CONFIG_DEBUG_FS) +extern ssize_t radeon_blob_read(struct file *filp, char __user *ubuf, + size_t max, loff_t *ppos); extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, struct seq_file *m); #endif - #endif diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 7352b76..6982d8f 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -497,10 +497,20 @@ static int radeon_debugfs_sa_info(struct seq_file *m, void *data) } +static int radeon_debugfs_lockup(struct seq_file *m, void *data) +{ + /* empty, should never be call */ + return 0; +} + static struct drm_info_list radeon_debugfs_sa_list[] = { {"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL}, }; +static struct drm_info_list radeon_debugfs_lockup_list[] = { + {"radeon_lockup", &radeon_debugfs_lockup, 0, NULL}, +}; + #endif int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring) @@ -526,7 +536,13 @@ int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *rin int radeon_debugfs_sa_init(struct radeon_device *rdev) { #if defined(CONFIG_DEBUG_FS) - return radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1, NULL); + int r; + + r = radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1, NULL); + if (r) { + return r; + } + return radeon_debugfs_add_files(rdev, radeon_debugfs_lockup_list, 1, radeon_blob_read); #else return 0; #endif diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c3ac7f4..2199afc 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -148,6 +148,7 @@ static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo) list_del_init(&sa_bo->olist); list_del_init(&sa_bo->flist); radeon_fence_unref(&sa_bo->fence); + kfree(sa_bo->buffers); kfree(sa_bo); } @@ -294,6 +295,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev, } (*sa_bo)->manager = sa_manager; (*sa_bo)->fence = NULL; + (*sa_bo)->buffers = NULL; + (*sa_bo)->nbuffers = 0; INIT_LIST_HEAD(&(*sa_bo)->olist); INIT_LIST_HEAD(&(*sa_bo)->flist); @@ -360,7 +363,125 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, *sa_bo = NULL; } +void radeon_sa_bo_faulty(struct radeon_device *rdev, + struct radeon_sa_manager *sa_manager, + unsigned ring) +{ + struct radeon_sa_bo *sa_bo = NULL, *tmp; + struct rati_cmd_buffer cmd; + unsigned long offset = 0; + unsigned i; + + rdev->blob_size = 0; + vfree(rdev->blob); + rdev->blob = NULL; + + spin_lock(&sa_manager->lock); + list_for_each_entry(tmp, &sa_manager->olist, olist) { + if (tmp->fence == NULL || + tmp->buffers == NULL || + tmp->fence->ring != ring || + tmp->fence->seq == RADEON_FENCE_NOTEMITED_SEQ || + radeon_fence_signaled(tmp->fence)) { + continue; + } + /* select the oldest unsignaled fence */ + if (sa_bo == NULL || tmp->fence->seq < sa_bo->fence->seq) { + sa_bo = tmp; + } + } + spin_unlock(&sa_manager->lock); + + if (sa_bo == NULL) { + return; + } + + /* init cmd buffer */ + cmd.id = RATI_CMD_BUFFER; + cmd.ver = 1; + switch (ring) { + case RADEON_RING_TYPE_GFX_INDEX: + cmd.ring = RATI_RING_GFX; + break; + case CAYMAN_RING_TYPE_CP1_INDEX: + case CAYMAN_RING_TYPE_CP2_INDEX: + cmd.ring = RATI_RING_COMPUTE; + break; + default: + return; + } + cmd.ndw = (sa_bo->eoffset - sa_bo->soffset) >> 2; + cmd.paded_ndw = ALIGN(cmd.ndw, 2); + cmd.chunk_size = sizeof(cmd) + cmd.paded_ndw * 4; + cmd.flags = sa_bo->cmd_flags; + + /* update header */ + rdev->blob_header.ncmd_buffers = 1; + rdev->blob_header.ndata_buffers = sa_bo->nbuffers; + + /* compute blob size */ + rdev->blob_size = sizeof(rdev->blob_header) + sizeof(cmd); + rdev->blob_size += sa_bo->nbuffers * sizeof(struct rati_data_buffer); + rdev->blob_size += cmd.paded_ndw * 4; + + for (i = 0; i < sa_bo->nbuffers; i++) { + rdev->blob_size += sa_bo->buffers[i].buffer.paded_ndw * 4; + } + + rdev->blob = vmalloc(rdev->blob_size); + if (rdev->blob == NULL) { + dev_err(rdev->dev, "failed allocating %lldkb for lockup dump\n", rdev->blob_size >> 10); + return; + } + + /* build blob */ + memcpy(rdev->blob, &rdev->blob_header, sizeof(rdev->blob_header)); + offset += sizeof(rdev->blob_header); + memcpy(rdev->blob + offset, &cmd, sizeof(cmd)); + offset += sizeof(cmd); + memcpy(rdev->blob + offset, radeon_sa_bo_cpu_addr(sa_bo), cmd.ndw * 4); + offset += cmd.paded_ndw * 4; + for (i = 0; i < sa_bo->nbuffers; i++) { + void *ptr = NULL; + int r; + + r = radeon_bo_reserve(sa_bo->buffers[i].bo, true); + if (!r) { + if (!radeon_bo_kmap(sa_bo->buffers[i].bo, &ptr)) { + sa_bo->buffers[i].buffer.flags |= RATI_DATA_VALID; + } + } + + memcpy(rdev->blob + offset, &sa_bo->buffers[i].buffer, + sizeof(struct rati_data_buffer)); + offset += sizeof(struct rati_data_buffer); + + if (ptr) { + memcpy(rdev->blob + offset, ptr, + sa_bo->buffers[i].buffer.size); + radeon_bo_kunmap(sa_bo->buffers[i].bo); + } + if (!r) { + radeon_bo_unreserve(sa_bo->buffers[i].bo); + } + offset += sa_bo->buffers[i].buffer.paded_ndw * 4; + } + + dev_err(rdev->dev, "added %lldkb lockup dump\n", rdev->blob_size >> 10); +} + #if defined(CONFIG_DEBUG_FS) +ssize_t radeon_blob_read(struct file *filp, char __user *ubuf, + size_t max, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct radeon_device *rdev = dev->dev_private; + + return simple_read_from_buffer(ubuf, max, ppos, rdev->blob, rdev->blob_size); +} + void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, struct seq_file *m) { diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 5805686..45f0f4e 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -971,4 +971,114 @@ struct drm_radeon_info { uint64_t value; }; + +/* + * RATI dump file format + */ + +/* List of uniq identifiant of each structure, the identifiant of the header + * also serve as file signature. + */ +#define RATI_HEADER 0xCAFEDEAD +#define RATI_CMD_BUFFER 1 +#define RATI_DATA_BUFFER 2 + +/* + * define ring + */ +#define RATI_RING_UNKNOWN 0 +#define RATI_RING_GFX 1 +#define RATI_RING_COMPUTE 2 + +/* struct rati_header + * + * Header of the file + * + * @id: uniq identifiant of the structure must be RATI_HEADER + * @ver: version + * @pciid: pciid of the GPU + * @family: GPU family + * @ndata_buffers: number of data buffer + * @ncmd_buffers: number of cmd buffer + */ +struct rati_header { + uint32_t id; + uint32_t ver; + uint64_t chunk_size; + uint32_t pciid; + uint32_t ndata_buffers; + uint32_t ncmd_buffers; + uint32_t pad; + /* end of version 1 */ +}; + +/* struct rati_cmd_buffer + * + * cmd buffer, follow by paded_ndw (paded ndw must leave next struct + * on 64bits aligned offset) + * + * @id: uniq identifiant of the structure must be RATI_CMD_BUFFER + * @ver: version + * @paded_ndw: paded number of dwords + * @ndw: ndwords in this command buffer + * @ring: which ring this cmd buffer should be executed on + * @flags: various flags to help userspace tools to do the proper + * things + */ +#define RATI_CMD_WITH_RELOC (1 << 0) /* cmd buffer have relocation */ +#define RATI_CMD_CLEAR_OFFSET (1 << 1) /* cmd buffer needs offset to be cleared */ + +struct rati_cmd_buffer { + uint32_t id; + uint32_t ver; + uint64_t chunk_size; + uint32_t paded_ndw; + uint32_t ndw; + uint32_t ring; + uint32_t flags; + /* end of version 1 */ +}; + +/* struct rati_data_buffer + * + * data buffer, follow by paded_ndw (paded ndw must leave next struct + * on 64bits aligned offset) + * + * @id: uniq identifiant of the structure must be RATI_DATA_BUFFER + * @ver: header version + * @alignment: alignment of buffer + * @paded_ndw: paded number of dwords + * @size: size of buffer in byte + * @offset: offset of this buffer while this was captured (could be 0 + * for all buffer is capturing from userspace) + * @flags: various flags that tells all the purpose for which the + * buffer is use (nothing is exclusive same buffer can be + * a texture and zbuffer or all of possible at the same + * time. Kernel will only set the valid flag bit, it's up + * to userspace tools to set more flags. + */ +#define RATI_DATA_VALID (1 << 0) +#define RATI_DATA_CBUF (1 << 1) /* color buffer */ +#define RATI_DATA_ZBUF (1 << 2) /* z buffer */ +#define RATI_DATA_SBUF (1 << 3) /* stencil buffer */ +#define RATI_DATA_TBUF (1 << 4) /* tile buffer for color buffer */ +#define RATI_DATA_FBUF (1 << 5) /* fmask buffer for color buffer */ +#define RATI_DATA_HBUF (1 << 6) /* htile buffer for z/stencil buffer */ +#define RATI_DATA_TEX (1 << 7) /* texture */ +#define RATI_DATA_VS (1 << 8) /* vertex shader */ +#define RATI_DATA_PS (1 << 9) /* pixel shader */ + +struct rati_data_buffer { + uint32_t id; + uint32_t ver; + uint64_t chunk_size; + uint32_t alignment; + uint32_t pad; + uint64_t paded_ndw; + uint64_t size; + uint64_t offset; + uint64_t flags; + /* end of version 1 */ +}; + #endif -- 1.7.7.6 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel