Am Dienstag, dem 02.03.2021 um 09:37 +0100 schrieb Ahmad Fatoum: > Hello Jules, Yann, > > On 01.03.21 16:58, Jules Maselbas wrote: > > From: Yann Sionneau <ysionneau@xxxxxxxxx> > > Some comments inline. I am not a cache cohereny expert, so take > it with a grain of salt. > > > > > Signed-off-by: Yann Sionneau <ysionneau@xxxxxxxxx> > > Signed-off-by: Jules Maselbas <jmaselbas@xxxxxxxxx> > > --- > > > --- /dev/null > > +++ b/arch/kvx/include/asm/dma.h > > @@ -0,0 +1,35 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* SPDX-FileCopyrightText: 2021 Yann Sionneau <ysionneau@xxxxxxxxx>, Kalray Inc. */ > > + > > +#ifndef __ASM_DMA_H > > +#define __ASM_DMA_H > > + > > +#include <common.h> > > + > > +#define KVX_DDR_32BIT_RAM_WINDOW_BA (0x80000000ULL) > > +#define KVX_DDR_64BIT_RAM_WINDOW_BA (0x100000000ULL) > > +#define MAX_32BIT_ADDR (0xffffffffULL) > > + > > +#define dma_alloc dma_alloc > > +static inline void *dma_alloc(size_t size) > > +{ > > + return xmemalign(64, ALIGN(size, 64)); > > +} > > + > > +static inline void *dma_alloc_coherent(size_t size, dma_addr_t *dma_handle) > > +{ > > + void *ret = xmemalign(PAGE_SIZE, size); > > + > > + if (dma_handle) > > + *dma_handle = (dma_addr_t)(uintptr_t)ret; > > + > > + return ret; > > +} > > This would imply that the CPU barebox is booting is coherent with all > > devices that barebox needs to access. Is that the case? > > (See below) > > > + > > +static inline void dma_free_coherent(void *mem, dma_addr_t dma_handle, > > + size_t size) > > +{ > > + free(mem); > > +} > > + > > +#endif /* __ASM_DMA_H */ > > diff --git a/arch/kvx/include/asm/sys_arch.h b/arch/kvx/include/asm/sys_arch.h > > index 9df32c4e7..ce07a5598 100644 > > --- a/arch/kvx/include/asm/sys_arch.h > > +++ b/arch/kvx/include/asm/sys_arch.h > > @@ -11,6 +11,9 @@ > > #define EXCEPTION_STRIDE 0x40 > > #define EXCEPTION_ALIGNMENT 0x100 > > > > > > > > > > > > > > > > > > +#define kvx_cluster_id() ((int) \ > > + ((kvx_sfr_get(PCR) & KVX_SFR_PCR_CID_MASK) \ > > + >> KVX_SFR_PCR_CID_SHIFT)) > > #define KVX_SFR_START(__sfr_reg) \ > > (KVX_SFR_## __sfr_reg ## _SHIFT) > > > > > > > > > > > > > > > > > > diff --git a/arch/kvx/lib/Makefile b/arch/kvx/lib/Makefile > > index d271ebccf..c730e1c23 100644 > > --- a/arch/kvx/lib/Makefile > > +++ b/arch/kvx/lib/Makefile > > @@ -3,4 +3,4 @@ > > # Copyright (C) 2019 Kalray Inc. > > # > > > > > > > > > > > > > > > > > > -obj-y += cpuinfo.o board.o dtb.o poweroff.o bootm.o setjmp.o cache.o > > +obj-y += cpuinfo.o board.o dtb.o poweroff.o bootm.o setjmp.o cache.o dma-default.o > > diff --git a/arch/kvx/lib/dma-default.c b/arch/kvx/lib/dma-default.c > > new file mode 100644 > > index 000000000..755a8c66f > > --- /dev/null > > +++ b/arch/kvx/lib/dma-default.c > > @@ -0,0 +1,91 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +// SPDX-FileCopyrightText: 2021 Yann Sionneau <ysionneau@xxxxxxxxx>, Kalray Inc. > > + > > +#include <dma.h> > > +#include <asm/barrier.h> > > +#include <asm/io.h> > > +#include <asm/cache.h> > > +#include <asm/sfr.h> > > +#include <asm/sys_arch.h> > > + > > +/* > > + * The implementation of arch should follow the following rules: > > + * map for_cpu for_device unmap > > + * TO_DEV writeback none writeback none > > + * FROM_DEV invalidate invalidate(*) invalidate invalidate(*) > > + * BIDIR writeback invalidate writeback invalidate > > + * > > + * (*) - only necessary if the CPU speculatively prefetches. > > + * > > + * (see https://lkml.org/lkml/2018/5/18/979) > > + */ > > + > > +void dma_sync_single_for_device(dma_addr_t addr, size_t size, > > + enum dma_data_direction dir) > > +{ > > + switch (dir) { > > + case DMA_FROM_DEVICE: > > + kvx_dcache_invalidate_mem_area(addr, size); Why do you need to explicitly invalidate, but not flush? Even if the CPU speculatively prefetches, the coherency protocol should make sure to invalidate the speculatively loaded lines, right? > > + break; > > + case DMA_TO_DEVICE: > > + case DMA_BIDIRECTIONAL: > > + /* allow device to read buffer written by CPU */ > > + wmb(); > > If the interconnect was indeed coherent, like dma_alloc_coherent > above hints, you wouldn't need any barriers here..? Coherency does not imply strict ordering, so the barriers are in fact correct, as the CPU write buffers and/or the interconnect can still change the ordering of the writes as seen by a remote observer. > > + break; > > + default: > > + BUG(); > > + } > > +} > > + > > +void dma_sync_single_for_cpu(dma_addr_t addr, size_t size, > > + enum dma_data_direction dir) > > +{ > > + switch (dir) { > > + case DMA_FROM_DEVICE: > > + case DMA_TO_DEVICE: > > + break; > > + case DMA_BIDIRECTIONAL: > > + kvx_dcache_invalidate_mem_area(addr, size); > > + break; > > + default: > > + BUG(); > > + } > > +} > > + > > +#define KVX_DDR_ALIAS_OFFSET \ > > + (KVX_DDR_64BIT_RAM_WINDOW_BA - KVX_DDR_32BIT_RAM_WINDOW_BA) > > +#define KVX_DDR_ALIAS_WINDOW \ > > + (KVX_DDR_64BIT_RAM_WINDOW_BA + KVX_DDR_ALIAS_OFFSET) > > + > > +/* Local smem is aliased between 0 and 16MB */ > > +#define KVX_SMEM_LOCAL_ALIAS 0x1000000ULL > > + > > +dma_addr_t dma_map_single(struct device_d *dev, void *ptr, size_t size, > > + enum dma_data_direction dir) > > +{ > > + uintptr_t addr = (uintptr_t) ptr; > > + > > + dma_sync_single_for_device(addr, size, dir); > > + > > + /* Local smem alias should never be used for dma */ > > + if (addr < KVX_SMEM_LOCAL_ALIAS) > > + return addr + (1 + kvx_cluster_id()) * KVX_SMEM_LOCAL_ALIAS; > > + > > + if (dev->dma_mask && addr <= dev->dma_mask) > > + return addr; > > + > > + if (addr >= KVX_DDR_ALIAS_WINDOW) > > + return DMA_ERROR_CODE; > > + > > + addr -= KVX_DDR_ALIAS_OFFSET; > > + if (dev->dma_mask && addr > dev->dma_mask) > > + return DMA_ERROR_CODE; > > + > > + return addr; > > +} > > + > > +void dma_unmap_single(struct device_d *dev, dma_addr_t addr, size_t size, > > + enum dma_data_direction dir) > > +{ > > + dma_sync_single_for_cpu(addr, size, dir); > > +} > > > _______________________________________________ barebox mailing list barebox@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/barebox