On 04/12/2023 17:33, Boris Brezillon wrote: > Contains everything that's FW related, that includes the code dealing > with the microcontroller unit (MCU) that's running the FW, and anything > related to allocating memory shared between the FW and the CPU. > > A few global FW events are processed in the IRQ handler, the rest is > forwarded to the scheduler, since scheduling is the primary reason for > the FW existence, and also the main source of FW <-> kernel > interactions. > > v3: > - Make the FW path more future-proof (Liviu) > - Use one waitqueue for all FW events > - Simplify propagation of FW events to the scheduler logic > - Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead > - Account for the panthor_vm changes > - Replace magic number with 0x7fffffff with ~0 to better signify that > it's the maximum permitted value. > - More accurate rounding when computing the firmware timeout. > - Add a 'sub iterator' helper function. This also adds a check that a > firmware entry doesn't overflow the firmware image. > - Drop __packed from FW structures, natural alignment is good enough. > - Other minor code improvements. > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> > Signed-off-by: Steven Price <steven.price@xxxxxxx> > --- > drivers/gpu/drm/panthor/panthor_fw.c | 1332 ++++++++++++++++++++++++++ > drivers/gpu/drm/panthor/panthor_fw.h | 504 ++++++++++ > 2 files changed, 1836 insertions(+) > create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c > create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h > > diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c > new file mode 100644 > index 000000000000..85afe769f567 > --- /dev/null > +++ b/drivers/gpu/drm/panthor/panthor_fw.c ... > +static int panthor_fw_load_section_entry(struct panthor_device *ptdev, > + const struct firmware *fw, > + struct panthor_fw_binary_iter *iter, > + u32 ehdr) > +{ > + struct panthor_fw_binary_section_entry_hdr hdr; > + struct panthor_fw_section *section; > + u32 section_size; > + u32 name_len; > + int ret; > + > + ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); > + if (ret) > + return ret; > + > + if (hdr.data.end < hdr.data.start) { > + drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", > + hdr.data.end, hdr.data.start); > + return -EINVAL; > + } > + > + if (hdr.va.end < hdr.va.start) { > + drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", > + hdr.va.end, hdr.va.start); > + return -EINVAL; > + } > + > + if (hdr.data.end > fw->size) { > + drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", > + hdr.data.end, fw->size); > + return -EINVAL; > + } > + > + if ((hdr.va.start & ~PAGE_MASK) != 0 || > + (hdr.va.end & ~PAGE_MASK) != 0) { > + drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", > + hdr.va.start, hdr.va.end); > + return -EINVAL; > + } > + > + if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { > + drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", > + hdr.flags); > + return -EINVAL; > + } > + > + if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { > + drm_warn(&ptdev->base, > + "Firmware protected mode entry not be supported, ignoring"); > + return 0; > + } > + > + if (hdr.va.start == CSF_MCU_SHARED_REGION_START && > + !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { > + drm_err(&ptdev->base, > + "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); > + return -EINVAL; > + } > + > + name_len = iter->size - iter->offset; > + > + section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); > + if (!section) > + return -ENOMEM; > + > + list_add_tail(§ion->node, &ptdev->fw->sections); > + section->flags = hdr.flags; > + section->data.size = hdr.data.end - hdr.data.start; > + > + if (section->data.size > 0) { > + void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); > + > + if (!data) > + return -ENOMEM; > + > + memcpy(data, fw->data + hdr.data.start, section->data.size); > + section->data.buf = data; > + } > + > + if (name_len > 0) { > + char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); > + > + if (!name) > + return -ENOMEM; > + > + memcpy(name, iter->data + iter->offset, name_len); > + name[name_len] = '\0'; > + section->name = name; > + } > + > + section_size = hdr.va.end - hdr.va.start; > + if (section_size) { > + u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; > + struct panthor_gem_object *bo; > + u32 vm_map_flags = 0; > + struct sg_table *sgt; > + u64 va = hdr.va.start; > + > + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) > + vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; > + > + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) > + vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; > + > + /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to > + * non-cacheable for now. We might want to introduce a new > + * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device > + * memory and is currently not used by our driver) for > + * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit > + * of IO-coherent systems. > + */ > + if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) > + vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; > + > + section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), > + section_size, > + DRM_PANTHOR_BO_NO_MMAP, > + vm_map_flags, va); > + if (IS_ERR(section->mem)) > + return PTR_ERR(section->mem); There's a small bug here - if panthor_kernel_bo_create() fails, section->mem will be left containing an error value, not NULL or a valid pointer. This means that on rmmod, panthor_fw_unplug() will try and free the BO even though it was never allocated, causing a kernel panic. The solution is obviously to only assign section->mem if the allocation actually succeeded. However I also wonder if the list_add_tail() call should be moved to later in the function, after we know everything else has succeeded, or if this function needs some more error handling in general. > + > + if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) > + return -EINVAL; > + > + if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { > + ret = panthor_kernel_bo_vmap(section->mem); > + if (ret) > + return ret; > + } > + > + panthor_fw_init_section_mem(ptdev, section); > + > + bo = to_panthor_bo(section->mem->obj); > + sgt = drm_gem_shmem_get_pages_sgt(&bo->base); > + if (IS_ERR(sgt)) > + return PTR_ERR(section->mem); > + > + dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); > + } > + > + if (hdr.va.start == CSF_MCU_SHARED_REGION_START) > + ptdev->fw->shared_section = section; > + > + return 0; > +} > + ... > +/** > + * panthor_fw_unplug() - Called when the device is unplugged. > + * @ptdev: Device. > + * > + * This function must make sure all pending operations are flushed before > + * will release device resources, thus preventing any interaction with > + * the HW. > + * > + * If there is still FW-related work running after this function returns, > + * they must use drm_dev_{enter,exit}() and skip any HW access when > + * drm_dev_enter() returns false. > + */ > +void panthor_fw_unplug(struct panthor_device *ptdev) > +{ > + struct panthor_fw_section *section; > + > + cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); > + > + /* Make sure the IRQ handler can be called after that point. */ > + if (ptdev->fw->irq.irq) > + panthor_job_irq_suspend(&ptdev->fw->irq); > + > + panthor_fw_stop(ptdev); > + > + if (ptdev->fw->vm) > + panthor_vm_idle(ptdev->fw->vm); > + > + list_for_each_entry(section, &ptdev->fw->sections, node) > + panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem); Here's where we destroy the potentially invalid section->mem. Note that the issues are twofold: Firstly, section->mem could be an error pointer as mentioned earlier. Secondly, panthor_kernel_bo_destroy() doesn't actually check for error values or even NULL. It would probably be worth adding NULL checks to panthor_kernel_bo_destroy() and panthor_kernel_bo_vunmap() to protect against this. > + > + panthor_vm_put(ptdev->fw->vm); > + > + panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); > +} > + Cheers, Chris