On the AX45MP core, cache coherency is a specification option so it may not be supported. In this case DMA will fail. As a workaround, firstly we allocate a global dma coherent pool from which DMA allocations are taken and marked as non-cacheable in the PMA region as specified in the device tree. Synchronization callbacks are implemented to synchronize when doing DMA transactions. This feature is based on the work posted [0] by Vincent Chen <vincentc@xxxxxxxxxxxxx> for the Andes AndeStart RISC-V CPU. [0] https://lore.kernel.org/lkml/1540982130-28248-1-git-send-email-vincentc@xxxxxxxxxxxxx/ Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@xxxxxxxxxxxxxx> --- arch/riscv/vendors/andes/Makefile | 1 + arch/riscv/vendors/andes/ax45mp_cache.c | 296 ++++++++++++++++++ arch/riscv/vendors/andes/ax45mp_nocache_dma.c | 65 ++++ arch/riscv/vendors/andes/include/proc.h | 9 + 4 files changed, 371 insertions(+) create mode 100644 arch/riscv/vendors/andes/ax45mp_cache.c create mode 100644 arch/riscv/vendors/andes/ax45mp_nocache_dma.c create mode 100644 arch/riscv/vendors/andes/include/proc.h diff --git a/arch/riscv/vendors/andes/Makefile b/arch/riscv/vendors/andes/Makefile index 60fa8226c4a3..f9ec92b92455 100644 --- a/arch/riscv/vendors/andes/Makefile +++ b/arch/riscv/vendors/andes/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARCH_R9A07G043) += ax45mp.o +obj-$(CONFIG_ARCH_R9A07G043) += ax45mp_cache.o ax45mp_nocache_dma.o diff --git a/arch/riscv/vendors/andes/ax45mp_cache.c b/arch/riscv/vendors/andes/ax45mp_cache.c new file mode 100644 index 000000000000..6941821e7e4a --- /dev/null +++ b/arch/riscv/vendors/andes/ax45mp_cache.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * non-coherent cache functions for AX45MP + * + * Copyright (C) 2022 Renesas Electronics Corp. + * + */ + +#include <linux/cacheinfo.h> +#include <linux/of_address.h> + +#include <asm/sbi.h> + +#include "include/sbi.h" + +/* D-cache operation */ +#define CCTL_L1D_VA_INVAL 0 +#define CCTL_L1D_VA_WB 1 + +/* L2 cache */ +#define L2_CACHE_CTL_CEN_MASK 1 + +/* L2 cache registers */ +#define L2C_REG_CTL_OFFSET 0x8 +#define L2C_REG_C0_CMD_OFFSET 0x40 +#define L2C_REG_C0_ACC_OFFSET 0x48 +#define L2C_REG_STATUS_OFFSET 0x80 + +/* L2 CCTL status */ +#define CCTL_L2_STATUS_IDLE 0 + +/* L2 CCTL status cores mask */ +#define CCTL_L2_STATUS_C0_MASK 0xf + +/* L2 cache operation */ +#define CCTL_L2_PA_INVAL 0x8 +#define CCTL_L2_PA_WB 0x9 + +#define L2C_HPM_PER_CORE_OFFSET 0x8 +#define L2C_REG_PER_CORE_OFFSET 0x10 +#define CCTL_L2_STATUS_PER_CORE_OFFSET 4 + +#define L2C_REG_CN_CMD_OFFSET(n) \ + (L2C_REG_C0_CMD_OFFSET + ((n) * L2C_REG_PER_CORE_OFFSET)) +#define L2C_REG_CN_ACC_OFFSET(n) \ + (L2C_REG_C0_ACC_OFFSET + ((n) * L2C_REG_PER_CORE_OFFSET)) +#define CCTL_L2_STATUS_CN_MASK(n) \ + (CCTL_L2_STATUS_C0_MASK << ((n) * CCTL_L2_STATUS_PER_CORE_OFFSET)) + +#define MICM_CFG_ISZ_OFFSET 6 +#define MICM_CFG_ISZ_MASK (0x7 << MICM_CFG_ISZ_OFFSET) + +#define MDCM_CFG_DSZ_OFFSET 6 +#define MDCM_CFG_DSZ_MASK (0x7 << MDCM_CFG_DSZ_OFFSET) + +#define CCTL_REG_UCCTLBEGINADDR_NUM 0x80b +#define CCTL_REG_UCCTLCOMMAND_NUM 0x80c + +#define MCACHE_CTL_CCTL_SUEN_OFFSET 8 +#define MMSC_CFG_CCTLCSR_OFFSET 16 +#define MISA_20_OFFSET 20 + +#define MCACHE_CTL_CCTL_SUEN_MASK (0x1 << MCACHE_CTL_CCTL_SUEN_OFFSET) +#define MMSC_CFG_CCTLCSR_MASK (0x1 << MMSC_CFG_CCTLCSR_OFFSET) +#define MISA_20_MASK (0x1 << MISA_20_OFFSET) + +#define MAX_CACHE_LINE_SIZE 256 + +struct ax45mp_cache_info { + bool init_done; + int dcache_line_size; +}; + +static DEFINE_PER_CPU(struct ax45mp_cache_info, cpu_cache_info) = { + .init_done = 0, + .dcache_line_size = SZ_64, +}; + +static void __iomem *l2c_base; + +static uint32_t cpu_get_mcache_ctl_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, SBI_EXT_ANDES_GET_MCACHE_CTL_STATUS, 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t cpu_get_micm_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, SBI_EXT_ANDES_GET_MICM_CTL_STATUS, 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t cpu_get_mdcm_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, SBI_EXT_ANDES_GET_MDCM_CTL_STATUS, 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t cpu_get_mmsc_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, SBI_EXT_ANDES_GET_MMSC_CTL_STATUS, 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static uint32_t cpu_get_misa_cfg_status(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_ANDES, SBI_EXT_ANDES_GET_MISA_CTL_STATUS, 0, 0, 0, 0, 0, 0); + return ret.value; +} + +static void fill_cpu_cache_info(struct ax45mp_cache_info *cpu_ci) +{ + struct cpu_cacheinfo *this_cpu_ci = + get_cpu_cacheinfo(smp_processor_id()); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + unsigned int i; + + for (i = 0; i < this_cpu_ci->num_leaves ; i++, this_leaf++) { + if (this_leaf->type == CACHE_TYPE_DATA) + cpu_ci->dcache_line_size = this_leaf->coherency_line_size; + } + + cpu_ci->init_done = true; +} + +static inline int get_cache_line_size(void) +{ + struct ax45mp_cache_info *cpu_ci = + &per_cpu(cpu_cache_info, smp_processor_id()); + + if (unlikely(!cpu_ci->init_done)) + fill_cpu_cache_info(cpu_ci); + return cpu_ci->dcache_line_size; +} + +static uint32_t cpu_l2c_get_cctl_status(void) +{ + return readl((void *)(l2c_base + L2C_REG_STATUS_OFFSET)); +} + +static uint32_t cpu_l2c_ctl_status(void) +{ + return readl((void *)(l2c_base + L2C_REG_CTL_OFFSET)); +} + +static bool cpu_cache_controlable(void) +{ + return (((cpu_get_micm_cfg_status() & MICM_CFG_ISZ_MASK) || + (cpu_get_mdcm_cfg_status() & MDCM_CFG_DSZ_MASK)) && + (cpu_get_misa_cfg_status() & MISA_20_MASK) && + (cpu_get_mmsc_cfg_status() & MMSC_CFG_CCTLCSR_MASK) && + (cpu_get_mcache_ctl_status() & MCACHE_CTL_CCTL_SUEN_MASK)); +} + +static void cpu_dcache_wb_range(unsigned long start, + unsigned long end, + int line_size) +{ + bool ucctl_ok = false; + unsigned long pa; + int mhartid = 0; +#ifdef CONFIG_SMP + mhartid = smp_processor_id(); +#endif + + ucctl_ok = cpu_cache_controlable(); + + while (end > start) { + if (ucctl_ok) { + csr_write(CCTL_REG_UCCTLBEGINADDR_NUM, start); + csr_write(CCTL_REG_UCCTLCOMMAND_NUM, CCTL_L1D_VA_WB); + } + + if (l2c_base && (cpu_l2c_ctl_status() & L2_CACHE_CTL_CEN_MASK)) { + pa = virt_to_phys((void *)start); + writel(pa, (void *)(l2c_base + L2C_REG_CN_ACC_OFFSET(mhartid))); + writel(CCTL_L2_PA_WB, (void *)(l2c_base + L2C_REG_CN_CMD_OFFSET(mhartid))); + while ((cpu_l2c_get_cctl_status() & + CCTL_L2_STATUS_CN_MASK(mhartid)) != CCTL_L2_STATUS_IDLE) + ; + } + + start += line_size; + } +} + +static void cpu_dcache_inval_range(unsigned long start, + unsigned long end, + int line_size) +{ + bool ucctl_ok = false; + unsigned long pa; + int mhartid = 0; +#ifdef CONFIG_SMP + mhartid = smp_processor_id(); +#endif + + ucctl_ok = cpu_cache_controlable(); + + while (end > start) { + if (ucctl_ok) { + csr_write(CCTL_REG_UCCTLBEGINADDR_NUM, start); + csr_write(CCTL_REG_UCCTLCOMMAND_NUM, CCTL_L1D_VA_INVAL); + } + + if (l2c_base && (cpu_l2c_ctl_status() & L2_CACHE_CTL_CEN_MASK)) { + pa = virt_to_phys((void *)start); + writel(pa, (void *)(l2c_base + L2C_REG_CN_ACC_OFFSET(mhartid))); + writel(CCTL_L2_PA_INVAL, + (void *)(l2c_base + L2C_REG_CN_CMD_OFFSET(mhartid))); + while ((cpu_l2c_get_cctl_status() & + CCTL_L2_STATUS_CN_MASK(mhartid)) != CCTL_L2_STATUS_IDLE) + ; + } + + start += line_size; + } +} + +void cpu_dma_inval_range(unsigned long start, unsigned long end) +{ + unsigned long line_size = get_cache_line_size(); + char cache_buf[2][MAX_CACHE_LINE_SIZE] = { 0 }; + unsigned long old_start = start; + unsigned long old_end = end; + unsigned long flags; + + if (unlikely(start == end)) + return; + + start = start & (~(line_size - 1)); + end = ((end + line_size - 1) & (~(line_size - 1))); + + local_irq_save(flags); + if (unlikely(start != old_start)) + memcpy(&cache_buf[0][0], (void *)start, line_size); + + if (unlikely(end != old_end)) + memcpy(&cache_buf[1][0], (void *)(old_end & (~(line_size - 1))), line_size); + + cpu_dcache_inval_range(start, end, line_size); + + if (unlikely(start != old_start)) + memcpy((void *)start, &cache_buf[0][0], (old_start & (line_size - 1))); + + if (unlikely(end != old_end)) + memcpy((void *)(old_end + 1), + &cache_buf[1][(old_end & (line_size - 1)) + 1], + end - old_end - 1); + + local_irq_restore(flags); +} +EXPORT_SYMBOL(cpu_dma_inval_range); + +void cpu_dma_wb_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + unsigned long line_size = get_cache_line_size(); + + local_irq_save(flags); + start = start & (~(line_size - 1)); + cpu_dcache_wb_range(start, end, line_size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(cpu_dma_wb_range); + +static const struct of_device_id ax45mp_cache_ids[] = { + { .compatible = "andestech,ax45mp-cache" }, + { /* sentinel */ } +}; + +static int __init ax45mp_cache_init(void) +{ + struct device_node *np; + + np = of_find_matching_node(NULL, ax45mp_cache_ids); + if (!np) + return -ENODEV; + + l2c_base = of_iomap(np, 0); + if (!l2c_base) + return -ENOMEM; + + return 0; +} +arch_initcall(ax45mp_cache_init); diff --git a/arch/riscv/vendors/andes/ax45mp_nocache_dma.c b/arch/riscv/vendors/andes/ax45mp_nocache_dma.c new file mode 100644 index 000000000000..263012f89d0b --- /dev/null +++ b/arch/riscv/vendors/andes/ax45mp_nocache_dma.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DMA callbacks implemented for the non-coherent cache + * on AX45MP + * + * Copyright (C) 2022 Renesas Electronics Corp. + * + */ + +#include <linux/dma-mapping.h> +#include <linux/dma-direct.h> + +#include "include/proc.h" + +static inline void cache_op(phys_addr_t paddr, size_t size, + void (*fn)(unsigned long start, unsigned long end)) +{ + unsigned long start; + + start = (unsigned long)phys_to_virt(paddr); + fn(start, start + size); +} + +void arch_sync_dma_for_device(phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + switch (dir) { + case DMA_FROM_DEVICE: + cache_op(paddr, size, cpu_dma_inval_range); + break; + case DMA_TO_DEVICE: + case DMA_BIDIRECTIONAL: + cache_op(paddr, size, cpu_dma_wb_range); + break; + default: + BUG(); + } +} + +void arch_sync_dma_for_cpu(phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + cache_op(paddr, size, cpu_dma_inval_range); + break; + default: + BUG(); + } +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) +{ + return dma_alloc_from_global_coherent(dev, size, handle); +} + +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t handle, unsigned long attrs) +{ + dma_release_from_global_coherent(0, vaddr); +} diff --git a/arch/riscv/vendors/andes/include/proc.h b/arch/riscv/vendors/andes/include/proc.h new file mode 100644 index 000000000000..ace9bcfccf6b --- /dev/null +++ b/arch/riscv/vendors/andes/include/proc.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __RISCV_ANDES_PROC_H +#define __RISCV_ANDES_PROC_H + +void cpu_dma_inval_range(unsigned long start, unsigned long end); +void cpu_dma_wb_range(unsigned long start, unsigned long end); + +#endif -- 2.25.1