On Mon, Dec 04, 2023 at 06:33:01PM +0100, Boris Brezillon wrote: > Contains everything that's FW related, that includes the code dealing > with the microcontroller unit (MCU) that's running the FW, and anything > related to allocating memory shared between the FW and the CPU. > > A few global FW events are processed in the IRQ handler, the rest is > forwarded to the scheduler, since scheduling is the primary reason for > the FW existence, and also the main source of FW <-> kernel > interactions. > > v3: > - Make the FW path more future-proof (Liviu) > - Use one waitqueue for all FW events > - Simplify propagation of FW events to the scheduler logic > - Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead > - Account for the panthor_vm changes > - Replace magic number with 0x7fffffff with ~0 to better signify that > it's the maximum permitted value. > - More accurate rounding when computing the firmware timeout. > - Add a 'sub iterator' helper function. This also adds a check that a > firmware entry doesn't overflow the firmware image. > - Drop __packed from FW structures, natural alignment is good enough. > - Other minor code improvements. > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> > Signed-off-by: Steven Price <steven.price@xxxxxxx> Hi Boris, While looking at Chris' comments, I have discovered another issue. > --- > drivers/gpu/drm/panthor/panthor_fw.c | 1332 ++++++++++++++++++++++++++ > drivers/gpu/drm/panthor/panthor_fw.h | 504 ++++++++++ > 2 files changed, 1836 insertions(+) > create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c > create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h > > diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c > new file mode 100644 > index 000000000000..85afe769f567 > --- /dev/null > +++ b/drivers/gpu/drm/panthor/panthor_fw.c <snip> > +static int panthor_fw_load_section_entry(struct panthor_device *ptdev, > + const struct firmware *fw, > + struct panthor_fw_binary_iter *iter, > + u32 ehdr) > +{ > + struct panthor_fw_binary_section_entry_hdr hdr; > + struct panthor_fw_section *section; > + u32 section_size; > + u32 name_len; > + int ret; > + > + ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); > + if (ret) > + return ret; > + > + if (hdr.data.end < hdr.data.start) { > + drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", > + hdr.data.end, hdr.data.start); > + return -EINVAL; > + } > + > + if (hdr.va.end < hdr.va.start) { > + drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", > + hdr.va.end, hdr.va.start); > + return -EINVAL; > + } > + > + if (hdr.data.end > fw->size) { > + drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", > + hdr.data.end, fw->size); > + return -EINVAL; > + } > + > + if ((hdr.va.start & ~PAGE_MASK) != 0 || > + (hdr.va.end & ~PAGE_MASK) != 0) { > + drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", > + hdr.va.start, hdr.va.end); > + return -EINVAL; > + } > + > + if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { > + drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", > + hdr.flags); > + return -EINVAL; > + } > + > + if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { > + drm_warn(&ptdev->base, > + "Firmware protected mode entry not be supported, ignoring"); > + return 0; > + } > + > + if (hdr.va.start == CSF_MCU_SHARED_REGION_START && > + !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { > + drm_err(&ptdev->base, > + "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); > + return -EINVAL; > + } > + > + name_len = iter->size - iter->offset; > + > + section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); > + if (!section) > + return -ENOMEM; > + > + list_add_tail(§ion->node, &ptdev->fw->sections); > + section->flags = hdr.flags; > + section->data.size = hdr.data.end - hdr.data.start; > + > + if (section->data.size > 0) { > + void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); > + > + if (!data) > + return -ENOMEM; > + > + memcpy(data, fw->data + hdr.data.start, section->data.size); > + section->data.buf = data; > + } > + > + if (name_len > 0) { > + char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); > + > + if (!name) > + return -ENOMEM; > + > + memcpy(name, iter->data + iter->offset, name_len); > + name[name_len] = '\0'; > + section->name = name; > + } > + > + section_size = hdr.va.end - hdr.va.start; > + if (section_size) { > + u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; > + struct panthor_gem_object *bo; > + u32 vm_map_flags = 0; > + struct sg_table *sgt; > + u64 va = hdr.va.start; > + > + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) > + vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; > + > + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) > + vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; > + > + /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to > + * non-cacheable for now. We might want to introduce a new > + * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device > + * memory and is currently not used by our driver) for > + * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit > + * of IO-coherent systems. > + */ > + if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) > + vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; > + > + section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), > + section_size, > + DRM_PANTHOR_BO_NO_MMAP, > + vm_map_flags, va); > + if (IS_ERR(section->mem)) > + return PTR_ERR(section->mem); > + > + if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) > + return -EINVAL; > + > + if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { > + ret = panthor_kernel_bo_vmap(section->mem); > + if (ret) > + return ret; > + } > + > + panthor_fw_init_section_mem(ptdev, section); > + > + bo = to_panthor_bo(section->mem->obj); > + sgt = drm_gem_shmem_get_pages_sgt(&bo->base); > + if (IS_ERR(sgt)) > + return PTR_ERR(section->mem); I think here we should return PTR_ERR(sgt). In general I agree with Chris that the list_add_tail() call should be delayed until all of the above allocations and preparations have succeeded. Best regards, Liviu > + > + dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); > + } > + > + if (hdr.va.start == CSF_MCU_SHARED_REGION_START) > + ptdev->fw->shared_section = section; > + > + return 0; > +} > + > +static void > +panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) > +{ > + struct panthor_fw_section *section; > + > + list_for_each_entry(section, &ptdev->fw->sections, node) { > + struct sg_table *sgt; > + > + if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) > + continue; > + > + panthor_fw_init_section_mem(ptdev, section); > + sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); > + if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) > + dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); > + } > +} > + > +static int panthor_fw_load_entry(struct panthor_device *ptdev, > + const struct firmware *fw, > + struct panthor_fw_binary_iter *iter) > +{ > + struct panthor_fw_binary_iter eiter; > + u32 ehdr; > + int ret; > + > + ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); > + if (ret) > + return ret; > + > + if ((iter->offset % sizeof(u32)) || > + (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { > + drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", > + (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); > + return -EINVAL; > + } > + > + if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, > + CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) > + return -EINVAL; > + > + switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { > + case CSF_FW_BINARY_ENTRY_TYPE_IFACE: > + return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); > + > + /* FIXME: handle those entry types? */ > + case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: > + case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: > + case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: > + case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: > + return 0; > + default: > + break; > + } > + > + if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) > + return 0; > + > + drm_err(&ptdev->base, > + "Unsupported non-optional entry type %u in firmware\n", > + CSF_FW_BINARY_ENTRY_TYPE(ehdr)); > + return -EINVAL; > +} > + > +static int panthor_fw_load(struct panthor_device *ptdev) > +{ > + const struct firmware *fw = NULL; > + struct panthor_fw_binary_iter iter = {}; > + struct panthor_fw_binary_hdr hdr; > + char fw_path[128]; > + int ret; > + > + snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", > + (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), > + (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), > + CSF_FW_NAME); > + > + ret = request_firmware(&fw, fw_path, ptdev->base.dev); > + if (ret) { > + drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", > + CSF_FW_NAME); > + return ret; > + } > + > + iter.data = fw->data; > + iter.size = fw->size; > + ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); > + if (ret) > + goto out; > + > + if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { > + ret = -EINVAL; > + drm_err(&ptdev->base, "Invalid firmware magic\n"); > + goto out; > + } > + > + if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { > + ret = -EINVAL; > + drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", > + hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); > + goto out; > + } > + > + if (hdr.size > iter.size) { > + drm_err(&ptdev->base, "Firmware image is truncated\n"); > + goto out; > + } > + > + iter.size = hdr.size; > + > + while (iter.offset < hdr.size) { > + ret = panthor_fw_load_entry(ptdev, fw, &iter); > + if (ret) > + goto out; > + } > + > + if (!ptdev->fw->shared_section) { > + drm_err(&ptdev->base, "Shared interface region not found\n"); > + ret = -EINVAL; > + goto out; > + } > + > +out: > + release_firmware(fw); > + return ret; > +} > + > +/** > + * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address > + * @ptdev: Device. > + * @mcu_va: MCU address. > + * > + * Return: NULL if the address is not part of the shared section, non-NULL otherwise. > + */ > +static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) > +{ > + u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); > + u64 shared_mem_end = shared_mem_start + > + panthor_kernel_bo_size(ptdev->fw->shared_section->mem); > + if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) > + return NULL; > + > + return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); > +} > + > +static int panthor_init_cs_iface(struct panthor_device *ptdev, > + unsigned int csg_idx, unsigned int cs_idx) > +{ > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); > + struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; > + u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); > + u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + > + (csg_idx * glb_iface->control->group_stride) + > + CSF_STREAM_CONTROL_OFFSET + > + (cs_idx * csg_iface->control->stream_stride); > + struct panthor_fw_cs_iface *first_cs_iface = > + panthor_fw_get_cs_iface(ptdev, 0, 0); > + > + if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) > + return -EINVAL; > + > + spin_lock_init(&cs_iface->lock); > + cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; > + cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); > + cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); > + > + if (!cs_iface->input || !cs_iface->output) { > + drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); > + return -EINVAL; > + } > + > + if (cs_iface != first_cs_iface) { > + if (cs_iface->control->features != first_cs_iface->control->features) { > + drm_err(&ptdev->base, "Expecting identical CS slots"); > + return -EINVAL; > + } > + } else { > + u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); > + > + ptdev->csif_info.cs_reg_count = reg_count; > + ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; > + } > + > + return 0; > +} > + > +static bool compare_csg(const struct panthor_fw_csg_control_iface *a, > + const struct panthor_fw_csg_control_iface *b) > +{ > + if (a->features != b->features) > + return false; > + if (a->suspend_size != b->suspend_size) > + return false; > + if (a->protm_suspend_size != b->protm_suspend_size) > + return false; > + if (a->stream_num != b->stream_num) > + return false; > + return true; > +} > + > +static int panthor_init_csg_iface(struct panthor_device *ptdev, > + unsigned int csg_idx) > +{ > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; > + u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); > + u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); > + unsigned int i; > + > + if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) > + return -EINVAL; > + > + spin_lock_init(&csg_iface->lock); > + csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; > + csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); > + csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); > + > + if (csg_iface->control->stream_num < MIN_CS_PER_CSG || > + csg_iface->control->stream_num > MAX_CS_PER_CSG) > + return -EINVAL; > + > + if (!csg_iface->input || !csg_iface->output) { > + drm_err(&ptdev->base, "Invalid group control interface input/output VA"); > + return -EINVAL; > + } > + > + if (csg_idx > 0) { > + struct panthor_fw_csg_iface *first_csg_iface = > + panthor_fw_get_csg_iface(ptdev, 0); > + > + if (!compare_csg(first_csg_iface->control, csg_iface->control)) { > + drm_err(&ptdev->base, "Expecting identical CSG slots"); > + return -EINVAL; > + } > + } > + > + for (i = 0; i < csg_iface->control->stream_num; i++) { > + int ret = panthor_init_cs_iface(ptdev, csg_idx, i); > + > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > +static u32 panthor_get_instr_features(struct panthor_device *ptdev) > +{ > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + > + if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) > + return 0; > + > + return glb_iface->control->instr_features; > +} > + > +static int panthor_fw_init_ifaces(struct panthor_device *ptdev) > +{ > + struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; > + unsigned int i; > + > + if (!ptdev->fw->shared_section->mem->kmap) > + return -EINVAL; > + > + spin_lock_init(&glb_iface->lock); > + glb_iface->control = ptdev->fw->shared_section->mem->kmap; > + > + if (!glb_iface->control->version) { > + drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); > + return -EINVAL; > + } > + > + glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); > + glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); > + if (!glb_iface->input || !glb_iface->output) { > + drm_err(&ptdev->base, "Invalid global control interface input/output VA"); > + return -EINVAL; > + } > + > + if (glb_iface->control->group_num > MAX_CSGS || > + glb_iface->control->group_num < MIN_CSGS) { > + drm_err(&ptdev->base, "Invalid number of control groups"); > + return -EINVAL; > + } > + > + for (i = 0; i < glb_iface->control->group_num; i++) { > + int ret = panthor_init_csg_iface(ptdev, i); > + > + if (ret) > + return ret; > + } > + > + drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x", > + CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), > + CSF_IFACE_VERSION_MINOR(glb_iface->control->version), > + CSF_IFACE_VERSION_PATCH(glb_iface->control->version), > + glb_iface->control->features, > + panthor_get_instr_features(ptdev)); > + return 0; > +} > + > +static void panthor_fw_init_global_iface(struct panthor_device *ptdev) > +{ > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + > + /* Enable all cores. */ > + glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; > + > + /* Setup timers. */ > + glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); > + glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; > + glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); > + > + /* Enable interrupts we care about. */ > + glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | > + GLB_PING | > + GLB_CFG_PROGRESS_TIMER | > + GLB_CFG_POWEROFF_TIMER | > + GLB_IDLE_EN | > + GLB_IDLE; > + > + panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); > + panthor_fw_toggle_reqs(glb_iface, req, ack, > + GLB_CFG_ALLOC_EN | > + GLB_CFG_POWEROFF_TIMER | > + GLB_CFG_PROGRESS_TIMER); > + > + gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); > + > + /* Kick the watchdog. */ > + mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, > + msecs_to_jiffies(PING_INTERVAL_MS)); > +} > + > +static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) > +{ > + if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) > + ptdev->fw->booted = true; > + > + wake_up_all(&ptdev->fw->req_waitqueue); > + > + /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ > + if (!ptdev->fw->booted) > + return; > + > + panthor_sched_report_fw_events(ptdev, status); > +} > +PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); > + > +static int panthor_fw_start(struct panthor_device *ptdev) > +{ > + bool timedout = false; > + > + ptdev->fw->booted = false; > + panthor_job_irq_resume(&ptdev->fw->irq, ~0); > + gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); > + > + if (!wait_event_timeout(ptdev->fw->req_waitqueue, > + ptdev->fw->booted, > + msecs_to_jiffies(1000))) { > + if (!ptdev->fw->booted && > + !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) > + timedout = true; > + } > + > + if (timedout) { > + drm_err(&ptdev->base, "Failed to boot MCU"); > + return -ETIMEDOUT; > + } > + > + return 0; > +} > + > +static void panthor_fw_stop(struct panthor_device *ptdev) > +{ > + u32 status; > + > + gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); > + if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, > + status == MCU_STATUS_DISABLED, 10, 100000)) > + drm_err(&ptdev->base, "Failed to stop MCU"); > +} > + > +/** > + * panthor_fw_pre_reset() - Call before a reset. > + * @ptdev: Device. > + * @on_hang: true if the reset was triggered on a GPU hang. > + * > + * If the reset is not triggered on a hang, we try to gracefully halt the > + * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. > + */ > +void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) > +{ > + /* Make sure we won't be woken up by a ping. */ > + cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); > + > + ptdev->fw->fast_reset = false; > + > + if (!on_hang) { > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + u32 status; > + > + panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); > + gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); > + if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, > + status == MCU_STATUS_HALT, 10, 100000) && > + glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { > + ptdev->fw->fast_reset = true; > + } else { > + drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); > + } > + > + /* The FW detects 0 -> 1 transitions. Make sure we reset > + * the HALT bit before the FW is rebooted. > + */ > + panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); > + } > + > + panthor_job_irq_suspend(&ptdev->fw->irq); > +} > + > +/** > + * panthor_fw_post_reset() - Call after a reset. > + * @ptdev: Device. > + * > + * Start the FW. If this is not a fast reset, all FW sections are reloaded to > + * make sure we can recover from a memory corruption. > + */ > +int panthor_fw_post_reset(struct panthor_device *ptdev) > +{ > + int ret; > + > + /* Make the MCU VM active. */ > + ret = panthor_vm_active(ptdev->fw->vm); > + if (ret) > + return ret; > + > + /* Reload all sections, including RO ones. We're not supposed > + * to end up here anyway, let's just assume the overhead of > + * reloading everything is acceptable. > + */ > + if (!ptdev->fw->fast_reset) > + panthor_reload_fw_sections(ptdev, true); > + > + ret = panthor_fw_start(ptdev); > + if (ret) > + return ret; > + > + /* We must re-initialize the global interface even on fast-reset. */ > + panthor_fw_init_global_iface(ptdev); > + return 0; > +} > + > +/** > + * panthor_fw_unplug() - Called when the device is unplugged. > + * @ptdev: Device. > + * > + * This function must make sure all pending operations are flushed before > + * will release device resources, thus preventing any interaction with > + * the HW. > + * > + * If there is still FW-related work running after this function returns, > + * they must use drm_dev_{enter,exit}() and skip any HW access when > + * drm_dev_enter() returns false. > + */ > +void panthor_fw_unplug(struct panthor_device *ptdev) > +{ > + struct panthor_fw_section *section; > + > + cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); > + > + /* Make sure the IRQ handler can be called after that point. */ > + if (ptdev->fw->irq.irq) > + panthor_job_irq_suspend(&ptdev->fw->irq); > + > + panthor_fw_stop(ptdev); > + > + if (ptdev->fw->vm) > + panthor_vm_idle(ptdev->fw->vm); > + > + list_for_each_entry(section, &ptdev->fw->sections, node) > + panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem); > + > + panthor_vm_put(ptdev->fw->vm); > + > + panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); > +} > + > +/** > + * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. > + * @req_ptr: Pointer to the req register. > + * @ack_ptr: Pointer to the ack register. > + * @wq: Wait queue to use for the sleeping wait. > + * @req_mask: Mask of requests to wait for. > + * @acked: Pointer to field that's updated with the acked requests. > + * If the function returns 0, *acked == req_mask. > + * @timeout_ms: Timeout expressed in milliseconds. > + * > + * Return: 0 on success, -ETIMEDOUT otherwise. > + */ > +static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, > + wait_queue_head_t *wq, > + u32 req_mask, u32 *acked, > + u32 timeout_ms) > +{ > + u32 ack, req = READ_ONCE(*req_ptr) & req_mask; > + int ret; > + > + /* Busy wait for a few µsecs before falling back to a sleeping wait. */ > + *acked = req_mask; > + ret = read_poll_timeout_atomic(READ_ONCE, ack, > + (ack & req_mask) == req, > + 0, 10, 0, > + *ack_ptr); > + if (!ret) > + return 0; > + > + if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, > + msecs_to_jiffies(timeout_ms))) > + return 0; > + > + /* Check one last time, in case we were not woken up for some reason. */ > + ack = READ_ONCE(*ack_ptr); > + if ((ack & req_mask) == req) > + return 0; > + > + *acked = ~(req ^ ack) & req_mask; > + return -ETIMEDOUT; > +} > + > +/** > + * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. > + * @ptdev: Device. > + * @req_mask: Mask of requests to wait for. > + * @acked: Pointer to field that's updated with the acked requests. > + * If the function returns 0, *acked == req_mask. > + * @timeout_ms: Timeout expressed in milliseconds. > + * > + * Return: 0 on success, -ETIMEDOUT otherwise. > + */ > +int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, > + u32 req_mask, u32 *acked, > + u32 timeout_ms) > +{ > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + > + /* GLB_HALT doesn't get acked through the FW interface. */ > + if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) > + return -EINVAL; > + > + return panthor_fw_wait_acks(&glb_iface->input->req, > + &glb_iface->output->ack, > + &ptdev->fw->req_waitqueue, > + req_mask, acked, timeout_ms); > +} > + > +/** > + * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. > + * @ptdev: Device. > + * @csg_slot: CSG slot ID. > + * @req_mask: Mask of requests to wait for. > + * @acked: Pointer to field that's updated with the acked requests. > + * If the function returns 0, *acked == req_mask. > + * @timeout_ms: Timeout expressed in milliseconds. > + * > + * Return: 0 on success, -ETIMEDOUT otherwise. > + */ > +int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, > + u32 req_mask, u32 *acked, u32 timeout_ms) > +{ > + struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); > + int ret; > + > + if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) > + return -EINVAL; > + > + ret = panthor_fw_wait_acks(&csg_iface->input->req, > + &csg_iface->output->ack, > + &ptdev->fw->req_waitqueue, > + req_mask, acked, timeout_ms); > + > + /* > + * Check that all bits in the state field were updated, is any mismatch > + * then clear all bits in the state field. This allows code to do > + * (acked & CSG_STATE_MASK) and get the right value. > + */ > + > + if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) > + *acked &= ~CSG_STATE_MASK; > + > + return ret; > +} > + > +/** > + * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. > + * @ptdev: Device. > + * @csg_mask: Bitmask encoding the command stream group doorbells to ring. > + * > + * This function is toggling bits in the doorbell_req and ringing the > + * global doorbell. It doesn't require a user doorbell to be attached to > + * the group. > + */ > +void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) > +{ > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + > + panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); > + gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); > +} > + > +static void panthor_fw_ping_work(struct work_struct *work) > +{ > + struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); > + struct panthor_device *ptdev = fw->irq.ptdev; > + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); > + u32 acked; > + int ret; > + > + if (panthor_device_reset_is_pending(ptdev)) > + return; > + > + panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); > + gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); > + > + ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); > + if (ret) { > + panthor_device_schedule_reset(ptdev); > + drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); > + } else { > + mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, > + msecs_to_jiffies(PING_INTERVAL_MS)); > + } > +} > + > +/** > + * panthor_fw_init() - Initialize FW related data. > + * @ptdev: Device. > + * > + * Return: 0 on success, a negative error code otherwise. > + */ > +int panthor_fw_init(struct panthor_device *ptdev) > +{ > + struct panthor_fw *fw; > + int ret, irq; > + > + fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); > + if (!fw) > + return -ENOMEM; > + > + ptdev->fw = fw; > + init_waitqueue_head(&fw->req_waitqueue); > + INIT_LIST_HEAD(&fw->sections); > + INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); > + > + irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); > + if (irq <= 0) > + return -ENODEV; > + > + ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); > + if (ret) { > + drm_err(&ptdev->base, "failed to request job irq"); > + return ret; > + } > + > + ret = panthor_gpu_l2_power_on(ptdev); > + if (ret) > + return ret; > + > + fw->vm = panthor_vm_create(ptdev, true, > + 0, SZ_4G, > + CSF_MCU_SHARED_REGION_START, > + CSF_MCU_SHARED_REGION_SIZE); > + if (IS_ERR(fw->vm)) { > + ret = PTR_ERR(fw->vm); > + fw->vm = NULL; > + goto err_unplug_fw; > + } > + > + ret = panthor_fw_load(ptdev); > + if (ret) > + goto err_unplug_fw; > + > + ret = panthor_vm_active(fw->vm); > + if (ret) > + goto err_unplug_fw; > + > + ret = panthor_fw_start(ptdev); > + if (ret) > + goto err_unplug_fw; > + > + ret = panthor_fw_init_ifaces(ptdev); > + if (ret) > + goto err_unplug_fw; > + > + panthor_fw_init_global_iface(ptdev); > + return 0; > + > +err_unplug_fw: > + panthor_fw_unplug(ptdev); > + return ret; > +} > diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h > new file mode 100644 > index 000000000000..1126b1ea199a > --- /dev/null > +++ b/drivers/gpu/drm/panthor/panthor_fw.h > @@ -0,0 +1,504 @@ > +/* SPDX-License-Identifier: GPL-2.0 or MIT */ > +/* Copyright 2023 Collabora ltd. */ > + > +#ifndef __PANTHOR_MCU_H__ > +#define __PANTHOR_MCU_H__ > + > +#include <linux/types.h> > + > +#include "panthor_device.h" > + > +struct panthor_kernel_bo; > + > +#define MAX_CSGS 31 > +#define MAX_CS_PER_CSG 32 > + > +struct panthor_fw_ringbuf_input_iface { > + u64 insert; > + u64 extract; > +}; > + > +struct panthor_fw_ringbuf_output_iface { > + u64 extract; > + u32 active; > +}; > + > +struct panthor_fw_cs_control_iface { > +#define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1) > +#define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8) > +#define CS_FEATURES_COMPUTE BIT(16) > +#define CS_FEATURES_FRAGMENT BIT(17) > +#define CS_FEATURES_TILER BIT(18) > + u32 features; > + u32 input_va; > + u32 output_va; > +}; > + > +struct panthor_fw_cs_input_iface { > +#define CS_STATE_MASK GENMASK(2, 0) > +#define CS_STATE_STOP 0 > +#define CS_STATE_START 1 > +#define CS_EXTRACT_EVENT BIT(4) > +#define CS_IDLE_SYNC_WAIT BIT(8) > +#define CS_IDLE_PROTM_PENDING BIT(9) > +#define CS_IDLE_EMPTY BIT(10) > +#define CS_IDLE_RESOURCE_REQ BIT(11) > +#define CS_TILER_OOM BIT(26) > +#define CS_PROTM_PENDING BIT(27) > +#define CS_FATAL BIT(30) > +#define CS_FAULT BIT(31) > +#define CS_REQ_MASK (CS_STATE_MASK | \ > + CS_EXTRACT_EVENT | \ > + CS_IDLE_SYNC_WAIT | \ > + CS_IDLE_PROTM_PENDING | \ > + CS_IDLE_EMPTY | \ > + CS_IDLE_RESOURCE_REQ) > +#define CS_EVT_MASK (CS_TILER_OOM | \ > + CS_PROTM_PENDING | \ > + CS_FATAL | \ > + CS_FAULT) > + u32 req; > + > +#define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0)) > +#define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8)) > + u32 config; > + u32 reserved1; > + u32 ack_irq_mask; > + u64 ringbuf_base; > + u32 ringbuf_size; > + u32 reserved2; > + u64 heap_start; > + u64 heap_end; > + u64 ringbuf_input; > + u64 ringbuf_output; > + u32 instr_config; > + u32 instrbuf_size; > + u64 instrbuf_base; > + u64 instrbuf_offset_ptr; > +}; > + > +struct panthor_fw_cs_output_iface { > + u32 ack; > + u32 reserved1[15]; > + u64 status_cmd_ptr; > + > +#define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0) > +#define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16) > +#define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16) > +#define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16) > +#define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24) > +#define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24) > +#define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24) > +#define CS_STATUS_WAIT_PROGRESS BIT(28) > +#define CS_STATUS_WAIT_PROTM BIT(29) > +#define CS_STATUS_WAIT_SYNC_64B BIT(30) > +#define CS_STATUS_WAIT_SYNC BIT(31) > + u32 status_wait; > + u32 status_req_resource; > + u64 status_wait_sync_ptr; > + u32 status_wait_sync_value; > + u32 status_scoreboards; > + > +#define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0 > +#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 > +#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 > +#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 > +#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 > +#define CS_STATUS_BLOCKED_REASON_RES 6 > +#define CS_STATUS_BLOCKED_REASON_FLUSH 7 > +#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) > + u32 status_blocked_reason; > + u32 status_wait_sync_value_hi; > + u32 reserved2[6]; > + > +#define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0)) > +#define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0)) > + u32 fault; > + u32 fatal; > + u64 fault_info; > + u64 fatal_info; > + u32 reserved3[10]; > + u32 heap_vt_start; > + u32 heap_vt_end; > + u32 reserved4; > + u32 heap_frag_end; > + u64 heap_address; > +}; > + > +struct panthor_fw_csg_control_iface { > + u32 features; > + u32 input_va; > + u32 output_va; > + u32 suspend_size; > + u32 protm_suspend_size; > + u32 stream_num; > + u32 stream_stride; > +}; > + > +struct panthor_fw_csg_input_iface { > +#define CSG_STATE_MASK GENMASK(2, 0) > +#define CSG_STATE_TERMINATE 0 > +#define CSG_STATE_START 1 > +#define CSG_STATE_SUSPEND 2 > +#define CSG_STATE_RESUME 3 > +#define CSG_ENDPOINT_CONFIG BIT(4) > +#define CSG_STATUS_UPDATE BIT(5) > +#define CSG_SYNC_UPDATE BIT(28) > +#define CSG_IDLE BIT(29) > +#define CSG_DOORBELL BIT(30) > +#define CSG_PROGRESS_TIMER_EVENT BIT(31) > +#define CSG_REQ_MASK (CSG_STATE_MASK | \ > + CSG_ENDPOINT_CONFIG | \ > + CSG_STATUS_UPDATE) > +#define CSG_EVT_MASK (CSG_SYNC_UPDATE | \ > + CSG_IDLE | \ > + CSG_PROGRESS_TIMER_EVENT) > + u32 req; > + u32 ack_irq_mask; > + > + u32 doorbell_req; > + u32 cs_irq_ack; > + u32 reserved1[4]; > + u64 allow_compute; > + u64 allow_fragment; > + u32 allow_other; > + > +#define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0)) > +#define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8)) > +#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) > +#define CSG_EP_REQ_EXCL_COMPUTE BIT(20) > +#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) > +#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) > +#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) > + u32 endpoint_req; > + u32 reserved2[2]; > + u64 suspend_buf; > + u64 protm_suspend_buf; > + u32 config; > + u32 iter_trace_config; > +}; > + > +struct panthor_fw_csg_output_iface { > + u32 ack; > + u32 reserved1; > + u32 doorbell_ack; > + u32 cs_irq_req; > + u32 status_endpoint_current; > + u32 status_endpoint_req; > + > +#define CSG_STATUS_STATE_IS_IDLE BIT(0) > + u32 status_state; > + u32 resource_dep; > +}; > + > +struct panthor_fw_global_control_iface { > + u32 version; > + u32 features; > + u32 input_va; > + u32 output_va; > + u32 group_num; > + u32 group_stride; > + u32 perfcnt_size; > + u32 instr_features; > +}; > + > +struct panthor_fw_global_input_iface { > +#define GLB_HALT BIT(0) > +#define GLB_CFG_PROGRESS_TIMER BIT(1) > +#define GLB_CFG_ALLOC_EN BIT(2) > +#define GLB_CFG_POWEROFF_TIMER BIT(3) > +#define GLB_PROTM_ENTER BIT(4) > +#define GLB_PERFCNT_EN BIT(5) > +#define GLB_PERFCNT_SAMPLER BIT(6) > +#define GLB_COUNTER_EN BIT(7) > +#define GLB_PING BIT(8) > +#define GLB_FWCFG_UPDATE BIT(9) > +#define GLB_IDLE_EN BIT(10) > +#define GLB_SLEEP BIT(12) > +#define GLB_INACTIVE_COMPUTE BIT(20) > +#define GLB_INACTIVE_FRAGMENT BIT(21) > +#define GLB_INACTIVE_TILER BIT(22) > +#define GLB_PROTM_EXIT BIT(23) > +#define GLB_PERFCNT_THRESHOLD BIT(24) > +#define GLB_PERFCNT_OVERFLOW BIT(25) > +#define GLB_IDLE BIT(26) > +#define GLB_DBG_CSF BIT(30) > +#define GLB_DBG_HOST BIT(31) > +#define GLB_REQ_MASK GENMASK(10, 0) > +#define GLB_EVT_MASK GENMASK(26, 20) > + u32 req; > + u32 ack_irq_mask; > + u32 doorbell_req; > + u32 reserved1; > + u32 progress_timer; > + > +#define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0)) > +#define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31) > + u32 poweroff_timer; > + u64 core_en_mask; > + u32 reserved2; > + u32 perfcnt_as; > + u64 perfcnt_base; > + u32 perfcnt_extract; > + u32 reserved3[3]; > + u32 perfcnt_config; > + u32 perfcnt_csg_select; > + u32 perfcnt_fw_enable; > + u32 perfcnt_csg_enable; > + u32 perfcnt_csf_enable; > + u32 perfcnt_shader_enable; > + u32 perfcnt_tiler_enable; > + u32 perfcnt_mmu_l2_enable; > + u32 reserved4[8]; > + u32 idle_timer; > +}; > + > +enum panthor_fw_halt_status { > + PANTHOR_FW_HALT_OK = 0, > + PANTHOR_FW_HALT_ON_PANIC = 0x4e, > + PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f, > +}; > + > +struct panthor_fw_global_output_iface { > + u32 ack; > + u32 reserved1; > + u32 doorbell_ack; > + u32 reserved2; > + u32 halt_status; > + u32 perfcnt_status; > + u32 perfcnt_insert; > +}; > + > +/** > + * struct panthor_fw_cs_iface - Firmware command stream slot interface > + */ > +struct panthor_fw_cs_iface { > + /** > + * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req > + * field. > + * > + * Needed so we can update the req field concurrently from the interrupt > + * handler and the scheduler logic. > + * > + * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW > + * interface sections are mapped uncached/write-combined right now, and > + * using cmpxchg() on such mappings leads to SError faults. Revisit when > + * we have 'SHARED' GPU mappings hooked up. > + */ > + spinlock_t lock; > + > + /** > + * @control: Command stream slot control interface. > + * > + * Used to expose command stream slot properties. > + * > + * This interface is read-only. > + */ > + struct panthor_fw_cs_control_iface *control; > + > + /** > + * @input: Command stream slot input interface. > + * > + * Used for host updates/events. > + */ > + struct panthor_fw_cs_input_iface *input; > + > + /** > + * @output: Command stream slot output interface. > + * > + * Used for FW updates/events. > + * > + * This interface is read-only. > + */ > + const struct panthor_fw_cs_output_iface *output; > +}; > + > +/** > + * struct panthor_fw_csg_iface - Firmware command stream group slot interface > + */ > +struct panthor_fw_csg_iface { > + /** > + * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req > + * field. > + * > + * Needed so we can update the req field concurrently from the interrupt > + * handler and the scheduler logic. > + * > + * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW > + * interface sections are mapped uncached/write-combined right now, and > + * using cmpxchg() on such mappings leads to SError faults. Revisit when > + * we have 'SHARED' GPU mappings hooked up. > + */ > + spinlock_t lock; > + > + /** > + * @control: Command stream group slot control interface. > + * > + * Used to expose command stream group slot properties. > + * > + * This interface is read-only. > + */ > + const struct panthor_fw_csg_control_iface *control; > + > + /** > + * @input: Command stream slot input interface. > + * > + * Used for host updates/events. > + */ > + struct panthor_fw_csg_input_iface *input; > + > + /** > + * @output: Command stream group slot output interface. > + * > + * Used for FW updates/events. > + * > + * This interface is read-only. > + */ > + const struct panthor_fw_csg_output_iface *output; > +}; > + > +/** > + * struct panthor_fw_global_iface - Firmware global interface > + */ > +struct panthor_fw_global_iface { > + /** > + * @lock: Lock protecting access to the panthor_fw_global_input_iface::req > + * field. > + * > + * Needed so we can update the req field concurrently from the interrupt > + * handler and the scheduler/FW management logic. > + * > + * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW > + * interface sections are mapped uncached/write-combined right now, and > + * using cmpxchg() on such mappings leads to SError faults. Revisit when > + * we have 'SHARED' GPU mappings hooked up. > + */ > + spinlock_t lock; > + > + /** > + * @control: Command stream group slot control interface. > + * > + * Used to expose global FW properties. > + * > + * This interface is read-only. > + */ > + const struct panthor_fw_global_control_iface *control; > + > + /** > + * @input: Global input interface. > + * > + * Used for host updates/events. > + */ > + struct panthor_fw_global_input_iface *input; > + > + /** > + * @output: Global output interface. > + * > + * Used for FW updates/events. > + * > + * This interface is read-only. > + */ > + const struct panthor_fw_global_output_iface *output; > +}; > + > +/** > + * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW > + * @__iface: The interface to operate on. > + * @__in_reg: Name of the register to update in the input section of the interface. > + * @__out_reg: Name of the register to take as a reference in the output section of the > + * interface. > + * @__mask: Mask to apply to the update. > + * > + * The Host -> FW event/message passing was designed to be lockless, with each side of > + * the channel having its writeable section. Events are signaled as a difference between > + * the host and FW side in the req/ack registers (when a bit differs, there's an event > + * pending, when they are the same, nothing needs attention). > + * > + * This helper allows one to update the req register based on the current value of the > + * ack register managed by the FW. Toggling a specific bit will flag an event. In order > + * for events to be re-evaluated, the interface doorbell needs to be rung. > + * > + * Concurrent accesses to the same req register is covered. > + * > + * Anything requiring atomic updates to multiple registers requires a dedicated lock. > + */ > +#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \ > + do { \ > + u32 __cur_val, __new_val, __out_val; \ > + spin_lock(&(__iface)->lock); \ > + __cur_val = READ_ONCE((__iface)->input->__in_reg); \ > + __out_val = READ_ONCE((__iface)->output->__out_reg); \ > + __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \ > + WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ > + spin_unlock(&(__iface)->lock); \ > + } while (0) > + > +/** > + * panthor_fw_update_reqs() - Update bits to reflect a configuration change > + * @__iface: The interface to operate on. > + * @__in_reg: Name of the register to update in the input section of the interface. > + * @__val: Value to set. > + * @__mask: Mask to apply to the update. > + * > + * Some configuration get passed through req registers that are also used to > + * send events to the FW. Those req registers being updated from the interrupt > + * handler, they require special helpers to update the configuration part as well. > + * > + * Concurrent accesses to the same req register is covered. > + * > + * Anything requiring atomic updates to multiple registers requires a dedicated lock. > + */ > +#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \ > + do { \ > + u32 __cur_val, __new_val; \ > + spin_lock(&(__iface)->lock); \ > + __cur_val = READ_ONCE((__iface)->input->__in_reg); \ > + __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ > + WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ > + spin_unlock(&(__iface)->lock); \ > + } while (0) > + > +struct panthor_fw_global_iface * > +panthor_fw_get_glb_iface(struct panthor_device *ptdev); > + > +struct panthor_fw_csg_iface * > +panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); > + > +struct panthor_fw_cs_iface * > +panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); > + > +int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, > + u32 *acked, u32 timeout_ms); > + > +int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked, > + u32 timeout_ms); > + > +void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot); > + > +struct panthor_kernel_bo * > +panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, > + struct panthor_fw_ringbuf_input_iface **input, > + const struct panthor_fw_ringbuf_output_iface **output, > + u32 *input_fw_va, u32 *output_fw_va); > +struct panthor_kernel_bo * > +panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size); > + > +struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev); > + > +void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang); > +int panthor_fw_post_reset(struct panthor_device *ptdev); > + > +static inline void panthor_fw_suspend(struct panthor_device *ptdev) > +{ > + panthor_fw_pre_reset(ptdev, false); > +} > + > +static inline int panthor_fw_resume(struct panthor_device *ptdev) > +{ > + return panthor_fw_post_reset(ptdev); > +} > + > +int panthor_fw_init(struct panthor_device *ptdev); > +void panthor_fw_unplug(struct panthor_device *ptdev); > + > +#endif > -- > 2.43.0 > -- ==================== | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --------------- ¯\_(ツ)_/¯