Hi Loic, On 7/27/18 8:14 AM, Loic Pallardy wrote: > Memory entry could be allocated in different ways (ioremap, > dma_alloc_coherent, internal RAM allocator...). > This patch introduces an alloc ops in rproc_mem_entry structure > to associate dedicated allocation mechanism to each memory entry > descriptor in order to do remote core agnostic from memory allocators. > > The introduction of this ops allows to perform allocation of all registered > carveout at the same time, just before calling rproc_start(). > It simplifies and makes uniform carveout management whatever origin. This patch is causing a kernel crash with trace entries. Please see further below for the cause. > > Signed-off-by: Loic Pallardy <loic.pallardy@xxxxxx> > --- > drivers/remoteproc/remoteproc_core.c | 261 ++++++++++++++++++++++------------- > include/linux/remoteproc.h | 7 + > 2 files changed, 175 insertions(+), 93 deletions(-) > > diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c > index 77b39ba..2c51549 100644 > --- a/drivers/remoteproc/remoteproc_core.c > +++ b/drivers/remoteproc/remoteproc_core.c > @@ -642,74 +642,31 @@ static int rproc_handle_devmem(struct rproc *rproc, struct fw_rsc_devmem *rsc, > } > > /** > - * rproc_release_carveout() - release acquired carveout > + * rproc_alloc_carveout() - allocated specified carveout > * @rproc: rproc handle > - * @mem: the memory entry to release > - * > - * This function releases specified memory entry @mem allocated via > - * dma_alloc_coherent() function by @rproc. > - */ > -static int rproc_release_carveout(struct rproc *rproc, > - struct rproc_mem_entry *mem) > -{ > - struct device *dev = &rproc->dev; > - > - /* clean up carveout allocations */ > - dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma); > - return 0; > -} > - > -/** > - * rproc_handle_carveout() - handle phys contig memory allocation requests > - * @rproc: rproc handle > - * @rsc: the resource entry > - * @avail: size of available data (for image validation) > - * > - * This function will handle firmware requests for allocation of physically > - * contiguous memory regions. > - * > - * These request entries should come first in the firmware's resource table, > - * as other firmware entries might request placing other data objects inside > - * these memory regions (e.g. data/code segments, trace resource entries, ...). > + * @mem: the memory entry to allocate > * > - * Allocating memory this way helps utilizing the reserved physical memory > - * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries > - * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB > - * pressure is important; it may have a substantial impact on performance. > + * This function allocate specified memory entry @mem using > + * dma_alloc_coherent() as default allocator > */ > -static int rproc_handle_carveout(struct rproc *rproc, > - struct fw_rsc_carveout *rsc, > - int offset, int avail) > +static int rproc_alloc_carveout(struct rproc *rproc, > + struct rproc_mem_entry *mem) > { > - struct rproc_mem_entry *carveout, *mapping = NULL; > + struct rproc_mem_entry *mapping = NULL; > struct device *dev = &rproc->dev; > dma_addr_t dma; > void *va; > int ret; > > - if (sizeof(*rsc) > avail) { > - dev_err(dev, "carveout rsc is truncated\n"); > - return -EINVAL; > - } > - > - /* make sure reserved bytes are zeroes */ > - if (rsc->reserved) { > - dev_err(dev, "carveout rsc has non zero reserved bytes\n"); > - return -EINVAL; > - } > - > - dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n", > - rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags); > - > - va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL); > + va = dma_alloc_coherent(dev->parent, mem->len, &dma, GFP_KERNEL); > if (!va) { > dev_err(dev->parent, > - "failed to allocate dma memory: len 0x%x\n", rsc->len); > + "failed to allocate dma memory: len 0x%x\n", mem->len); > return -ENOMEM; > } > > dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%x\n", > - va, &dma, rsc->len); > + va, &dma, mem->len); > > /* > * Ok, this is non-standard. > @@ -729,22 +686,22 @@ static int rproc_handle_carveout(struct rproc *rproc, > * physical address in this case. > */ > > - if (rsc->da != FW_RSC_ADDR_ANY && !rproc->domain) { > - dev_err(dev->parent, > - "Bad carveout rsc configuration\n"); > - ret = -ENOMEM; > - goto dma_free; > - } > + if (mem->da != FW_RSC_ADDR_ANY) { > + if (!rproc->domain) { > + dev_err(dev->parent, > + "Bad carveout rsc configuration\n"); > + ret = -ENOMEM; > + goto dma_free; > + } Same comment from Patch 1. > > - if (rsc->da != FW_RSC_ADDR_ANY && rproc->domain) { > mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); > if (!mapping) { > ret = -ENOMEM; > goto dma_free; > } > > - ret = iommu_map(rproc->domain, rsc->da, dma, rsc->len, > - rsc->flags); > + ret = iommu_map(rproc->domain, mem->da, dma, mem->len, > + mem->flags); > if (ret) { > dev_err(dev, "iommu_map failed: %d\n", ret); > goto free_mapping; > @@ -757,52 +714,102 @@ static int rproc_handle_carveout(struct rproc *rproc, > * We can't trust the remote processor not to change the > * resource table, so we must maintain this info independently. > */ > - mapping->da = rsc->da; > - mapping->len = rsc->len; > + mapping->da = mem->da; > + mapping->len = mem->len; > list_add_tail(&mapping->node, &rproc->mappings); > > dev_dbg(dev, "carveout mapped 0x%x to %pad\n", > - rsc->da, &dma); > + mem->da, &dma); > + } else { > + mem->da = (u32)dma; Hmm, what was the purpose of this? So, this appears to be handling the missing implementation for the comment in the fw_rsc_carveout about FW_RSC_ADDR_ANY. > } > > - /* > - * Some remote processors might need to know the pa > - * even though they are behind an IOMMU. E.g., OMAP4's > - * remote M3 processor needs this so it can control > - * on-chip hardware accelerators that are not behind > - * the IOMMU, and therefor must know the pa. > - * > - * Generally we don't want to expose physical addresses > - * if we don't have to (remote processors are generally > - * _not_ trusted), so we might want to do this only for > - * remote processor that _must_ have this (e.g. OMAP4's > - * dual M3 subsystem). > - * > - * Non-IOMMU processors might also want to have this info. > - * In this case, the device address and the physical address > - * are the same. > - */ > - rsc->pa = (u32)rproc_va_to_pa(va); > - > - carveout = rproc_mem_entry_init(dev, va, dma, rsc->len, rsc->da, > - rproc_release_carveout, rsc->name); > - if (!carveout) > - goto free_carv; > - > - rproc_add_carveout(rproc, carveout); > + mem->dma = (u32)dma; We don't need the typecast, mem->dma is already of type dma_addr_t. Same comment above on the else part as well. > + mem->va = va; > > return 0; > > -free_carv: > - kfree(carveout); > free_mapping: > kfree(mapping); > dma_free: > - dma_free_coherent(dev->parent, rsc->len, va, dma); > + dma_free_coherent(dev->parent, mem->len, va, dma); > return ret; > } > > /** > + * rproc_release_carveout() - release acquired carveout > + * @rproc: rproc handle > + * @mem: the memory entry to release > + * > + * This function releases specified memory entry @mem allocated via > + * rproc_alloc_carveout() function by @rproc. > + */ > +static int rproc_release_carveout(struct rproc *rproc, > + struct rproc_mem_entry *mem) > +{ > + struct device *dev = &rproc->dev; > + > + /* clean up carveout allocations */ > + dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma); > + return 0; > +} > + > +/** > + * rproc_handle_carveout() - handle phys contig memory allocation requests > + * @rproc: rproc handle > + * @rsc: the resource entry > + * @avail: size of available data (for image validation) > + * > + * This function will handle firmware requests for allocation of physically > + * contiguous memory regions. > + * > + * These request entries should come first in the firmware's resource table, > + * as other firmware entries might request placing other data objects inside > + * these memory regions (e.g. data/code segments, trace resource entries, ...). > + * > + * Allocating memory this way helps utilizing the reserved physical memory > + * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries > + * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB > + * pressure is important; it may have a substantial impact on performance. > + */ > +static int rproc_handle_carveout(struct rproc *rproc, > + struct fw_rsc_carveout *rsc, > + int offset, int avail) > +{ > + struct rproc_mem_entry *carveout; > + struct device *dev = &rproc->dev; > + > + if (sizeof(*rsc) > avail) { > + dev_err(dev, "carveout rsc is truncated\n"); > + return -EINVAL; > + } > + > + /* make sure reserved bytes are zeroes */ > + if (rsc->reserved) { > + dev_err(dev, "carveout rsc has non zero reserved bytes\n"); > + return -EINVAL; > + } > + > + dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n", > + rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags); > + > + /* Register carveout in in list */ > + carveout = rproc_mem_entry_init(dev, 0, 0, rsc->len, rsc->da, > + rproc_alloc_carveout, > + rproc_release_carveout, rsc->name); > + if (!carveout) { > + dev_err(dev, "Can't allocate memory entry structure\n"); > + return -ENOMEM; > + } > + > + carveout->flags = rsc->flags; > + carveout->rsc_offset = offset; > + rproc_add_carveout(rproc, carveout); Once we get rid of rproc_add_carveout, the list addition will mostly be handled in rproc_mem_entry_init itself. > + > + return 0; > +} > + > +/** > * rproc_add_carveout() - register an allocated carveout region > * @rproc: rproc handle > * @mem: memory entry to register > @@ -832,6 +839,7 @@ void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem) > struct rproc_mem_entry * > rproc_mem_entry_init(struct device *dev, > void *va, dma_addr_t dma, int len, u32 da, > + int (*alloc)(struct rproc *, struct rproc_mem_entry *), > int (*release)(struct rproc *, struct rproc_mem_entry *), > const char *name, ...) > { > @@ -846,7 +854,9 @@ struct rproc_mem_entry * > mem->dma = dma; > mem->da = da; > mem->len = len; > + mem->alloc = alloc; > mem->release = release; > + mem->rsc_offset = FW_RSC_ADDR_ANY; > > va_start(args, name); > vsnprintf(mem->name, sizeof(mem->name), name, args); > @@ -978,6 +988,63 @@ static void rproc_unprepare_subdevices(struct rproc *rproc) > } > > /** > + * rproc_alloc_registered_carveouts() - allocate all carveouts registered > + * in the list > + * @rproc: the remote processor handle > + * > + * This function parses registered carveout list, performs allocation > + * if alloc() ops registered and updates resource table information > + * if rsc_offset set. > + * > + * Return: 0 on success > + */ > +static int rproc_alloc_registered_carveouts(struct rproc *rproc) > +{ > + struct rproc_mem_entry *entry, *tmp; > + struct fw_rsc_carveout *rsc; > + struct device *dev = &rproc->dev; > + int ret; > + > + list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) { > + if (entry->alloc) { > + ret = entry->alloc(rproc, entry); > + if (ret) { > + dev_err(dev, "Unable to allocate carveout %s: %d\n", > + entry->name, ret); > + return -ENOMEM; > + } > + } > + > + if (entry->rsc_offset != FW_RSC_ADDR_ANY) { > + /* update resource table */ > + rsc = (void *)rproc->table_ptr + entry->rsc_offset; > + > + /* > + * Some remote processors might need to know the pa > + * even though they are behind an IOMMU. E.g., OMAP4's > + * remote M3 processor needs this so it can control > + * on-chip hardware accelerators that are not behind > + * the IOMMU, and therefor must know the pa. > + * > + * Generally we don't want to expose physical addresses > + * if we don't have to (remote processors are generally > + * _not_ trusted), so we might want to do this only for > + * remote processor that _must_ have this (e.g. OMAP4's > + * dual M3 subsystem). > + * > + * Non-IOMMU processors might also want to have this info. > + * In this case, the device address and the physical address > + * are the same. > + */ > + if (entry->va) > + rsc->pa = (u32)rproc_va_to_pa(entry->va); > + } > + } > + > + return 0; > +} > + > +/** > * rproc_coredump_cleanup() - clean up dump_segments list > * @rproc: the remote processor handle > */ > @@ -1148,6 +1215,14 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw) > goto clean_up_resources; > } > > + /* Allocate carveout resources associated to rproc */ > + ret = rproc_alloc_registered_carveouts(rproc); > + if (ret) { > + dev_err(dev, "Failed to allocate associated carveouts: %d\n", > + ret); > + goto clean_up_resources; > + } This is causing an issue with RSC_TRACE on where the trace region on the remote processor is actually backed by a DDR carveout address. The allocations are now being done after processing the resources from the rproc_loading_handlers, which causes the RSC_TRACE to be configured with an incorrect kernel va, and accessing it through debugfs then results in a kernel crash. regards Suman > + > ret = rproc_start(rproc, fw); > if (ret) > goto clean_up_resources; > diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h > index 55f30fc..ea95b04 100644 > --- a/include/linux/remoteproc.h > +++ b/include/linux/remoteproc.h > @@ -317,6 +317,9 @@ struct fw_rsc_vdev { > * @priv: associated data > * @name: associated memory region name (optional) > * @node: list node > + * @rsc_offset: offset in resource table > + * @flags: iommu protection flags > + * @alloc: specific memory allocator function > */ > struct rproc_mem_entry { > void *va; > @@ -326,6 +329,9 @@ struct rproc_mem_entry { > void *priv; > char name[32]; > struct list_head node; > + u32 rsc_offset; > + u32 flags; > + int (*alloc)(struct rproc *rproc, struct rproc_mem_entry *mem); > int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem); > }; > > @@ -563,6 +569,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, > struct rproc_mem_entry * > rproc_mem_entry_init(struct device *dev, > void *va, dma_addr_t dma, int len, u32 da, > + int (*alloc)(struct rproc *, struct rproc_mem_entry *), > int (*release)(struct rproc *, struct rproc_mem_entry *), > const char *name, ...); > >