Hi, On 11.03.2024 17:58, Jeffrey Hugo wrote: > During the boot process of AIC100, the bootloaders (PBL and SBL) log > messages to device RAM. During SBL, if the host opens the QAIC_LOGGING > channel, SBL will offload the contents of the log buffer to the host, > and stream any new messages that SBL logs. > > This log of the boot process can be very useful for an initial triage of > any boot related issues. For example, if SBL rejects one of the runtime > firmware images for a validation failure, SBL will log a reason why. > > Add the ability of the driver to open the logging channel, receive the > messages, and store them. Also define a debugfs entry called "bootlog" > by hooking into the DRM debugfs framework. When the bootlog debugfs > entry is read, the current contents of the log that the host is caching > is displayed to the user. The driver will retain the cache until it > detects that the device has rebooted. At that point, the cache will be > freed, and the driver will wait for a new log. With this scheme, the > driver will only have a cache of the log from the current device boot. > Note that if the driver initializes a device and it is already in the > runtime state (QSM), no bootlog will be available through this mechanism > because the driver and SBL have not communicated. > > Signed-off-by: Jeffrey Hugo <quic_jhugo@xxxxxxxxxxx> > Reviewed-by: Carl Vanderlip <quic_carlv@xxxxxxxxxxx> > Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@xxxxxxxxxxx> > --- > drivers/accel/qaic/Makefile | 2 + > drivers/accel/qaic/qaic.h | 8 + > drivers/accel/qaic/qaic_debugfs.c | 271 ++++++++++++++++++++++++++++++ > drivers/accel/qaic/qaic_debugfs.h | 20 +++ > drivers/accel/qaic/qaic_drv.c | 16 +- > 5 files changed, 316 insertions(+), 1 deletion(-) > create mode 100644 drivers/accel/qaic/qaic_debugfs.c > create mode 100644 drivers/accel/qaic/qaic_debugfs.h > > diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile > index 3f7f6dfde7f2..2cadcc1baa0e 100644 > --- a/drivers/accel/qaic/Makefile > +++ b/drivers/accel/qaic/Makefile > @@ -11,3 +11,5 @@ qaic-y := \ > qaic_data.o \ > qaic_drv.o \ > qaic_timesync.o > + > +qaic-$(CONFIG_DEBUG_FS) += qaic_debugfs.o > diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h > index 9256653b3036..03d9c9fbffb3 100644 > --- a/drivers/accel/qaic/qaic.h > +++ b/drivers/accel/qaic/qaic.h > @@ -153,6 +153,14 @@ struct qaic_device { > struct mhi_device *qts_ch; > /* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */ > struct workqueue_struct *qts_wq; > + /* Head of list of page allocated by MHI bootlog device */ > + struct list_head bootlog; > + /* MHI bootlog channel device */ > + struct mhi_device *bootlog_ch; > + /* Work queue for tasks related to MHI bootlog device */ > + struct workqueue_struct *bootlog_wq; > + /* Synchronizes access of pages in MHI bootlog device */ > + struct mutex bootlog_mutex; > }; > > struct qaic_drm_device { > diff --git a/drivers/accel/qaic/qaic_debugfs.c b/drivers/accel/qaic/qaic_debugfs.c > new file mode 100644 > index 000000000000..4f87fe29be1a > --- /dev/null > +++ b/drivers/accel/qaic/qaic_debugfs.c > @@ -0,0 +1,271 @@ > +// SPDX-License-Identifier: GPL-2.0-only > + > +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ > +/* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ > + > +#include <linux/debugfs.h> > +#include <linux/device.h> > +#include <linux/fs.h> > +#include <linux/list.h> > +#include <linux/mhi.h> > +#include <linux/mutex.h> > +#include <linux/pci.h> > +#include <linux/seq_file.h> > +#include <linux/string.h> > +#include <linux/types.h> > +#include <linux/workqueue.h> > + > +#include "qaic.h" > +#include "qaic_debugfs.h" > + > +#define BOOTLOG_POOL_SIZE 16 > +#define BOOTLOG_MSG_SIZE 512 > + > +struct bootlog_msg { > + /* Buffer for bootlog messages */ > + char str[BOOTLOG_MSG_SIZE]; > + /* Root struct of device, used to access device resources */ > + struct qaic_device *qdev; > + /* Work struct to schedule work coming on QAIC_LOGGING channel */ > + struct work_struct work; > +}; > + > +struct bootlog_page { > + /* Node in list of bootlog pages maintained by root device struct */ > + struct list_head node; > + /* Total size of the buffer that holds the bootlogs. It is PAGE_SIZE */ > + unsigned int size; > + /* Offset for the next bootlog */ > + unsigned int offset; > +}; > + > +static int bootlog_show(struct seq_file *s, void *unused) > +{ > + struct bootlog_page *page; > + struct qaic_device *qdev; > + void *page_end; > + void *log; > + > + qdev = s->private; > + mutex_lock(&qdev->bootlog_mutex); > + list_for_each_entry(page, &qdev->bootlog, node) { > + log = page + 1; > + page_end = (void *)page + page->offset; > + while (log < page_end) { > + seq_printf(s, "%s", (char *)log); > + log += strlen(log) + 1; > + } > + } > + mutex_unlock(&qdev->bootlog_mutex); > + > + return 0; > +} > + > +static int bootlog_fops_open(struct inode *inode, struct file *file) > +{ > + return single_open(file, bootlog_show, inode->i_private); > +} > + > +static const struct file_operations bootlog_fops = { > + .owner = THIS_MODULE, > + .open = bootlog_fops_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = single_release, > +}; > + > +void qaic_debugfs_init(struct qaic_drm_device *qddev) > +{ > + struct qaic_device *qdev = qddev->qdev; > + struct dentry *debugfs_root; > + > + debugfs_root = to_drm(qddev)->debugfs_root; > + > + debugfs_create_file("bootlog", 0400, debugfs_root, qdev, &bootlog_fops); > +} > + > +static struct bootlog_page *alloc_bootlog_page(struct qaic_device *qdev) > +{ > + struct bootlog_page *page; > + > + page = (struct bootlog_page *)devm_get_free_pages(&qdev->pdev->dev, GFP_KERNEL, 0); > + if (!page) > + return page; > + > + page->size = PAGE_SIZE; > + page->offset = sizeof(*page); > + list_add_tail(&page->node, &qdev->bootlog); > + > + return page; > +} > + > +static int reset_bootlog(struct qaic_device *qdev) > +{ > + struct bootlog_page *page; > + struct bootlog_page *i; > + > + list_for_each_entry_safe(page, i, &qdev->bootlog, node) { > + list_del(&page->node); > + devm_free_pages(&qdev->pdev->dev, (unsigned long)page); > + } This is currently dead code. reset is only used to init the bootlog. You may consider making this init_bootlog() if you are not planning to actually reset the bootlog. > + > + page = alloc_bootlog_page(qdev); > + if (!page) > + return -ENOMEM; > + > + return 0; > +} > + > +static void *bootlog_get_space(struct qaic_device *qdev, unsigned int size) > +{ > + struct bootlog_page *page; > + > + page = list_last_entry(&qdev->bootlog, struct bootlog_page, node); > + > + if (size > page->size - sizeof(*page)) Not critical but would be safer to use this condition: "sizeof(*page) + size > page->size" > + return NULL; > + > + if (page->offset + size > page->size) { > + page = alloc_bootlog_page(qdev); > + if (!page) > + return NULL; > + } > + > + return (void *)page + page->offset; > +} > + > +static void bootlog_commit(struct qaic_device *qdev, unsigned int size) > +{ > + struct bootlog_page *page; > + > + page = list_last_entry(&qdev->bootlog, struct bootlog_page, node); > + > + page->offset += size; > +} > + > +static void bootlog_log(struct work_struct *work) > +{ > + struct bootlog_msg *msg = container_of(work, struct bootlog_msg, work); > + unsigned int len = strlen(msg->str) + 1; > + struct qaic_device *qdev = msg->qdev; > + void *log; > + > + mutex_lock(&qdev->bootlog_mutex); > + log = bootlog_get_space(qdev, len); > + if (log) { > + memcpy(log, msg, len); > + bootlog_commit(qdev, len); > + } > + mutex_unlock(&qdev->bootlog_mutex); > + > + if (mhi_queue_buf(qdev->bootlog_ch, DMA_FROM_DEVICE, msg, BOOTLOG_MSG_SIZE, MHI_EOT)) > + devm_kfree(&qdev->pdev->dev, msg); You are freeing `struct work` while still in work callback. This is unsafe. See https://elixir.bootlin.com/linux/v6.8/source/kernel/workqueue.c#L2564. Work ptr is kept in busy_hash after the callback has finished and may be still be accessed. > +} > + > +static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) > +{ > + struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); > + struct bootlog_msg *msg; > + int i, ret; > + > + qdev->bootlog_wq = alloc_ordered_workqueue("qaic_bootlog", 0); > + if (!qdev->bootlog_wq) { > + ret = -ENOMEM; > + goto out; > + } > + > + mutex_lock(&qdev->bootlog_mutex); Looks like locking should be inside reset_bootlog(), like in other places. > + ret = reset_bootlog(qdev); > + mutex_unlock(&qdev->bootlog_mutex); > + if (ret) > + goto destroy_workqueue; > + > + ret = mhi_prepare_for_transfer(mhi_dev); > + if (ret) > + goto destroy_workqueue; > + > + for (i = 0; i < BOOTLOG_POOL_SIZE; i++) { > + msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL); > + if (!msg) { > + ret = -ENOMEM; > + goto mhi_unprepare; > + } > + > + msg->qdev = qdev; > + INIT_WORK(&msg->work, bootlog_log); > + > + ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, msg, BOOTLOG_MSG_SIZE, MHI_EOT); > + if (ret) > + goto mhi_unprepare; > + } > + > + dev_set_drvdata(&mhi_dev->dev, qdev); > + qdev->bootlog_ch = mhi_dev; > + return 0; > + > +mhi_unprepare: > + mhi_unprepare_from_transfer(mhi_dev); > +destroy_workqueue: > + flush_workqueue(qdev->bootlog_wq); > + destroy_workqueue(qdev->bootlog_wq); > +out: > + return ret; > +} > + > +static void qaic_bootlog_mhi_remove(struct mhi_device *mhi_dev) > +{ > + struct qaic_device *qdev; > + > + qdev = dev_get_drvdata(&mhi_dev->dev); > + > + mhi_unprepare_from_transfer(qdev->bootlog_ch); > + flush_workqueue(qdev->bootlog_wq); > + destroy_workqueue(qdev->bootlog_wq); > + qdev->bootlog_ch = NULL; > +} > + > +static void qaic_bootlog_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) > +{ > +} > + > +static void qaic_bootlog_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) > +{ > + struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); > + struct bootlog_msg *msg = mhi_result->buf_addr; > + > + if (mhi_result->transaction_status) { > + devm_kfree(&qdev->pdev->dev, msg); > + return; > + } > + > + /* Force a null at the end of the transferred string */ > + msg->str[mhi_result->bytes_xferd - 1] = 0; Is it guaranteed that bytes_xferd will always be within valid range here? > + > + queue_work(qdev->bootlog_wq, &msg->work); > +} > + > +static const struct mhi_device_id qaic_bootlog_mhi_match_table[] = { > + { .chan = "QAIC_LOGGING", }, > + {}, > +}; > + > +static struct mhi_driver qaic_bootlog_mhi_driver = { > + .id_table = qaic_bootlog_mhi_match_table, > + .remove = qaic_bootlog_mhi_remove, > + .probe = qaic_bootlog_mhi_probe, > + .ul_xfer_cb = qaic_bootlog_mhi_ul_xfer_cb, > + .dl_xfer_cb = qaic_bootlog_mhi_dl_xfer_cb, > + .driver = { > + .name = "qaic_bootlog", > + }, > +}; > + > +int qaic_bootlog_register(void) > +{ > + return mhi_driver_register(&qaic_bootlog_mhi_driver); > +} > + > +void qaic_bootlog_unregister(void) > +{ > + mhi_driver_unregister(&qaic_bootlog_mhi_driver); > +} > diff --git a/drivers/accel/qaic/qaic_debugfs.h b/drivers/accel/qaic/qaic_debugfs.h > new file mode 100644 > index 000000000000..ea3fd1a88405 > --- /dev/null > +++ b/drivers/accel/qaic/qaic_debugfs.h > @@ -0,0 +1,20 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +/* Copyright (c) 2020, The Linux Foundation. All rights reserved. */ > +/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ > + > +#ifndef __QAIC_DEBUGFS_H__ > +#define __QAIC_DEBUGFS_H__ > + > +#include <drm/drm_file.h> > + > +#ifdef CONFIG_DEBUG_FS > +int qaic_bootlog_register(void); > +void qaic_bootlog_unregister(void); > +void qaic_debugfs_init(struct qaic_drm_device *qddev); > +#else > +int qaic_bootlog_register(void) { return 0; } > +void qaic_bootlog_unregister(void) {} > +void qaic_debugfs_init(struct qaic_drm_device *qddev) {} > +#endif /* CONFIG_DEBUG_FS */ > +#endif /* __QAIC_DEBUGFS_H__ */ > diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c > index d1a632dbaec6..f072edb74f22 100644 > --- a/drivers/accel/qaic/qaic_drv.c > +++ b/drivers/accel/qaic/qaic_drv.c > @@ -28,6 +28,7 @@ > > #include "mhi_controller.h" > #include "qaic.h" > +#include "qaic_debugfs.h" > #include "qaic_timesync.h" > > MODULE_IMPORT_NS(DMA_BUF); > @@ -229,8 +230,12 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) > qddev->partition_id = partition_id; > > ret = drm_dev_register(drm, 0); > - if (ret) > + if (ret) { > pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); > + return ret; > + } > + > + qaic_debugfs_init(qddev); > > return ret; > } > @@ -380,6 +385,9 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de > if (ret) > return NULL; > ret = drmm_mutex_init(drm, &qdev->cntl_mutex); > + if (ret) > + return NULL; > + ret = drmm_mutex_init(drm, &qdev->bootlog_mutex); > if (ret) > return NULL; > > @@ -399,6 +407,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de > qddev->qdev = qdev; > > INIT_LIST_HEAD(&qdev->cntl_xfer_list); > + INIT_LIST_HEAD(&qdev->bootlog); > INIT_LIST_HEAD(&qddev->users); > > for (i = 0; i < qdev->num_dbc; ++i) { > @@ -639,6 +648,10 @@ static int __init qaic_init(void) > if (ret) > pr_debug("qaic: qaic_timesync_init failed %d\n", ret); > > + ret = qaic_bootlog_register(); > + if (ret) > + pr_debug("qaic: qaic_bootlog_register failed %d\n", ret); > + > return 0; > > free_pci: > @@ -664,6 +677,7 @@ static void __exit qaic_exit(void) > * reinitializing the link_up state after the cleanup is done. > */ > link_up = true; > + qaic_bootlog_unregister(); > qaic_timesync_deinit(); > mhi_driver_unregister(&qaic_mhi_driver); > pci_unregister_driver(&qaic_pci_driver);