The width of HT-bus is only 40-bit, but Loongson-3 has 48-bit physical address. This implies only node-0's memory is DMAable because high bits (Node ID) will lost. Fortunately, by configuring address windows in firmware, we can extract 2bit Node ID (bit 44~47, only bit 44~45 used now) from Loongson-3's 48-bit address space and embed it into 40-bit (bit 37~38). Every NUMA node can do DMA now (however, maximum memory of each node is reduced to 2^37 = 128GB). Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx> --- .../mips/include/asm/mach-loongson/dma-coherence.h | 30 ++++++++++++++++++- arch/mips/loongson/Kconfig | 5 +++ arch/mips/loongson/common/dma-swiotlb.c | 10 ++++++ 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson/dma-coherence.h b/arch/mips/include/asm/mach-loongson/dma-coherence.h index 6a90275..aff3261 100644 --- a/arch/mips/include/asm/mach-loongson/dma-coherence.h +++ b/arch/mips/include/asm/mach-loongson/dma-coherence.h @@ -23,7 +23,17 @@ static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr, size_t size) { #ifdef CONFIG_CPU_LOONGSON3 - return virt_to_phys(addr); + long nid; + dma_addr_t daddr; + + daddr = virt_to_phys(addr); +#ifdef CONFIG_PHYS48_TO_HT40 + /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from + * Loongson-3's 48bit address space and embed it into 40bit */ + nid = (daddr >> 44) & 0x3; + daddr = ((nid << 44) ^ daddr) | (nid << 37); +#endif + return daddr; #else return virt_to_phys(addr) | 0x80000000; #endif @@ -33,7 +43,17 @@ static inline dma_addr_t plat_map_dma_mem_page(struct device *dev, struct page *page) { #ifdef CONFIG_CPU_LOONGSON3 - return page_to_phys(page); + long nid; + dma_addr_t daddr; + + daddr = page_to_phys(page); +#ifdef CONFIG_PHYS48_TO_HT40 + /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from + * Loongson-3's 48bit address space and embed it into 40bit */ + nid = (daddr >> 44) & 0x3; + daddr = ((nid << 44) ^ daddr) | (nid << 37); +#endif + return daddr; #else return page_to_phys(page) | 0x80000000; #endif @@ -43,6 +63,12 @@ static inline unsigned long plat_dma_addr_to_phys(struct device *dev, dma_addr_t dma_addr) { #if defined(CONFIG_CPU_LOONGSON3) && defined(CONFIG_64BIT) + long nid; + +#ifdef CONFIG_PHYS48_TO_HT40 + nid = (dma_addr >> 37) & 0x3; + dma_addr = ((nid << 37) ^ dma_addr) | (nid << 44); +#endif return dma_addr; #elif defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT) return (dma_addr > 0x8fffffff) ? dma_addr : (dma_addr & 0x0fffffff); diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig index 1b91fc6..72f830ac 100644 --- a/arch/mips/loongson/Kconfig +++ b/arch/mips/loongson/Kconfig @@ -86,6 +86,7 @@ config LOONGSON_MACH3X select LOONGSON_MC146818 select ZONE_DMA32 select LEFI_FIRMWARE_INTERFACE + select PHYS48_TO_HT40 help Generic Loongson 3 family machines utilize the 3A/3B revision of Loongson processor and RS780/SBX00 chipset. @@ -131,6 +132,10 @@ config SWIOTLB select NEED_SG_DMA_LENGTH select NEED_DMA_MAP_STATE +config PHYS48_TO_HT40 + bool + default y if CPU_LOONGSON3 + config LOONGSON_MC146818 bool default n diff --git a/arch/mips/loongson/common/dma-swiotlb.c b/arch/mips/loongson/common/dma-swiotlb.c index c2be01f..d0d43ad 100644 --- a/arch/mips/loongson/common/dma-swiotlb.c +++ b/arch/mips/loongson/common/dma-swiotlb.c @@ -105,11 +105,21 @@ static int loongson_dma_set_mask(struct device *dev, u64 mask) dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { + long nid; +#ifdef CONFIG_PHYS48_TO_HT40 + nid = (paddr >> 44) & 0x3; + paddr = ((nid << 44) ^ paddr) | (nid << 37); +#endif return paddr; } phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { + long nid; +#ifdef CONFIG_PHYS48_TO_HT40 + nid = (daddr >> 37) & 0x3; + daddr = ((nid << 37) ^ daddr) | (nid << 44); +#endif return daddr; } -- 1.7.7.3