This is a note to let you know that I've just added the patch titled remoteproc: imx_dsp_rproc: Add custom memory copy implementation for i.MX DSP Cores to the 6.1-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: remoteproc-imx_dsp_rproc-add-custom-memory-copy-impl.patch and it can be found in the queue-6.1 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit dfde6790f1bdbbcfb555ba29a7ae750aa929a7c0 Author: Iuliana Prodan <iuliana.prodan@xxxxxxx> Date: Tue Feb 21 19:03:56 2023 +0200 remoteproc: imx_dsp_rproc: Add custom memory copy implementation for i.MX DSP Cores [ Upstream commit 408ec1ff0caa340c57eecf4cbd14ef0132036a50 ] The IRAM is part of the HiFi DSP. According to hardware specification only 32-bits write are allowed otherwise we get a Kernel panic. Therefore add a custom memory copy and memset functions to deal with the above restriction. Signed-off-by: Iuliana Prodan <iuliana.prodan@xxxxxxx> Link: https://lore.kernel.org/r/20230221170356.27923-1-iuliana.prodan@xxxxxxxxxxx Signed-off-by: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/remoteproc/imx_dsp_rproc.c b/drivers/remoteproc/imx_dsp_rproc.c index 506ec9565716b..e8e23f6b85563 100644 --- a/drivers/remoteproc/imx_dsp_rproc.c +++ b/drivers/remoteproc/imx_dsp_rproc.c @@ -721,6 +721,191 @@ static void imx_dsp_rproc_kick(struct rproc *rproc, int vqid) dev_err(dev, "%s: failed (%d, err:%d)\n", __func__, vqid, err); } +/* + * Custom memory copy implementation for i.MX DSP Cores + * + * The IRAM is part of the HiFi DSP. + * According to hw specs only 32-bits writes are allowed. + */ +static int imx_dsp_rproc_memcpy(void *dest, const void *src, size_t size) +{ + const u8 *src_byte = src; + const u32 *source = src; + u32 affected_mask; + u32 *dst = dest; + int i, q, r; + u32 tmp; + + /* destination must be 32bit aligned */ + if (!IS_ALIGNED((uintptr_t)dest, 4)) + return -EINVAL; + + q = size / 4; + r = size % 4; + + /* copy data in units of 32 bits at a time */ + for (i = 0; i < q; i++) + writel(source[i], &dst[i]); + + if (r) { + affected_mask = GENMASK(8 * r, 0); + + /* + * first read the 32bit data of dest, then change affected + * bytes, and write back to dest. + * For unaffected bytes, it should not be changed + */ + tmp = readl(dest + q * 4); + tmp &= ~affected_mask; + + /* avoid reading after end of source */ + for (i = 0; i < r; i++) + tmp |= (src_byte[q * 4 + i] << (8 * i)); + + writel(tmp, dest + q * 4); + } + + return 0; +} + +/* + * Custom memset implementation for i.MX DSP Cores + * + * The IRAM is part of the HiFi DSP. + * According to hw specs only 32-bits writes are allowed. + */ +static int imx_dsp_rproc_memset(void *addr, u8 value, size_t size) +{ + u32 tmp_val = value; + u32 *tmp_dst = addr; + u32 affected_mask; + int q, r; + u32 tmp; + + /* destination must be 32bit aligned */ + if (!IS_ALIGNED((uintptr_t)addr, 4)) + return -EINVAL; + + tmp_val |= tmp_val << 8; + tmp_val |= tmp_val << 16; + + q = size / 4; + r = size % 4; + + while (q--) + writel(tmp_val, tmp_dst++); + + if (r) { + affected_mask = GENMASK(8 * r, 0); + + /* + * first read the 32bit data of addr, then change affected + * bytes, and write back to addr. + * For unaffected bytes, it should not be changed + */ + tmp = readl(tmp_dst); + tmp &= ~affected_mask; + + tmp |= (tmp_val & affected_mask); + writel(tmp, tmp_dst); + } + + return 0; +} + +/* + * imx_dsp_rproc_elf_load_segments() - load firmware segments to memory + * @rproc: remote processor which will be booted using these fw segments + * @fw: the ELF firmware image + * + * This function loads the firmware segments to memory, where the remote + * processor expects them. + * + * Return: 0 on success and an appropriate error code otherwise + */ +static int imx_dsp_rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw) +{ + struct device *dev = &rproc->dev; + const void *ehdr, *phdr; + int i, ret = 0; + u16 phnum; + const u8 *elf_data = fw->data; + u8 class = fw_elf_get_class(fw); + u32 elf_phdr_get_size = elf_size_of_phdr(class); + + ehdr = elf_data; + phnum = elf_hdr_get_e_phnum(class, ehdr); + phdr = elf_data + elf_hdr_get_e_phoff(class, ehdr); + + /* go through the available ELF segments */ + for (i = 0; i < phnum; i++, phdr += elf_phdr_get_size) { + u64 da = elf_phdr_get_p_paddr(class, phdr); + u64 memsz = elf_phdr_get_p_memsz(class, phdr); + u64 filesz = elf_phdr_get_p_filesz(class, phdr); + u64 offset = elf_phdr_get_p_offset(class, phdr); + u32 type = elf_phdr_get_p_type(class, phdr); + void *ptr; + + if (type != PT_LOAD || !memsz) + continue; + + dev_dbg(dev, "phdr: type %d da 0x%llx memsz 0x%llx filesz 0x%llx\n", + type, da, memsz, filesz); + + if (filesz > memsz) { + dev_err(dev, "bad phdr filesz 0x%llx memsz 0x%llx\n", + filesz, memsz); + ret = -EINVAL; + break; + } + + if (offset + filesz > fw->size) { + dev_err(dev, "truncated fw: need 0x%llx avail 0x%zx\n", + offset + filesz, fw->size); + ret = -EINVAL; + break; + } + + if (!rproc_u64_fit_in_size_t(memsz)) { + dev_err(dev, "size (%llx) does not fit in size_t type\n", + memsz); + ret = -EOVERFLOW; + break; + } + + /* grab the kernel address for this device address */ + ptr = rproc_da_to_va(rproc, da, memsz, NULL); + if (!ptr) { + dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da, + memsz); + ret = -EINVAL; + break; + } + + /* put the segment where the remote processor expects it */ + if (filesz) { + ret = imx_dsp_rproc_memcpy(ptr, elf_data + offset, filesz); + if (ret) { + dev_err(dev, "memory copy failed for da 0x%llx memsz 0x%llx\n", + da, memsz); + break; + } + } + + /* zero out remaining memory for this segment */ + if (memsz > filesz) { + ret = imx_dsp_rproc_memset(ptr + filesz, 0, memsz - filesz); + if (ret) { + dev_err(dev, "memset failed for da 0x%llx memsz 0x%llx\n", + da, memsz); + break; + } + } + } + + return ret; +} + static int imx_dsp_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw) { if (rproc_elf_load_rsc_table(rproc, fw)) @@ -735,7 +920,7 @@ static const struct rproc_ops imx_dsp_rproc_ops = { .start = imx_dsp_rproc_start, .stop = imx_dsp_rproc_stop, .kick = imx_dsp_rproc_kick, - .load = rproc_elf_load_segments, + .load = imx_dsp_rproc_elf_load_segments, .parse_fw = imx_dsp_rproc_parse_fw, .sanity_check = rproc_elf_sanity_check, .get_boot_addr = rproc_elf_get_boot_addr,