> On Sep 11, 2019, at 2:25 PM, Nicolas Saenz Julienne <nsaenzjulienne@xxxxxxx> wrote: > > So far all arm64 devices have supported 32 bit DMA masks for their > peripherals. This is not true anymore for the Raspberry Pi 4 as most of > it's peripherals can only address the first GB of memory on a total of > up to 4 GB. > > This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32 > to be addressable with a 32 bit mask. So it was decided to re-introduce > ZONE_DMA in arm64. > > ZONE_DMA will contain the lower 1G of memory, which is currently the > memory area addressable by any peripheral on an arm64 device. > ZONE_DMA32 will contain the rest of the 32 bit addressable memory. > > Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@xxxxxxx> > Reviewed-by: Catalin Marinas <catalin.marinas@xxxxxxx> > > --- > > I kept the Reviewed-by as the last bug solution was proposed by Catalin > > Changes in v6: > - Fixed bug in max_zone_phys > > Changes in v5: > - Fixed swiotlb initialization > > Changes in v4: > - Fixed issue when NUMA=n and ZONE_DMA=n > - Merged two max_zone_dma*_phys() functions > > Changes in v3: > - Used fixed size ZONE_DMA > - Fix check befor swiotlb_init() > > Changes in v2: > - Update comment to reflect new zones split > - ZONE_DMA will never be left empty > > arch/arm64/Kconfig | 4 +++ > arch/arm64/include/asm/page.h | 2 ++ > arch/arm64/mm/init.c | 54 +++++++++++++++++++++++++---------- > 3 files changed, 45 insertions(+), 15 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 6b6362b83004..2dbe0165bd15 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -267,6 +267,10 @@ config GENERIC_CSUM > config GENERIC_CALIBRATE_DELAY > def_bool y > > +config ZONE_DMA > + bool "Support DMA zone" if EXPERT > + default y > + > config ZONE_DMA32 > bool "Support DMA32 zone" if EXPERT > default y > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index d39ddb258a04..7b8c98830101 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -38,4 +38,6 @@ extern int pfn_valid(unsigned long); > > #include <asm-generic/getorder.h> > > +#define ARCH_ZONE_DMA_BITS 30 > + > #endif > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c > index 8e9bc64c5878..44f07fdf7a59 100644 > --- a/arch/arm64/mm/init.c > +++ b/arch/arm64/mm/init.c > @@ -56,6 +56,13 @@ EXPORT_SYMBOL(physvirt_offset); > struct page *vmemmap __ro_after_init; > EXPORT_SYMBOL(vmemmap); > > +/* > + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of > + * memory as some devices, namely the Raspberry Pi 4, have peripherals with > + * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 > + * bit addressable memory area. > + */ > +phys_addr_t arm64_dma_phys_limit __ro_after_init; > phys_addr_t arm64_dma32_phys_limit __ro_after_init; > > #ifdef CONFIG_KEXEC_CORE > @@ -169,15 +176,16 @@ static void __init reserve_elfcorehdr(void) > { > } > #endif /* CONFIG_CRASH_DUMP */ > + > /* > - * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It > - * currently assumes that for memory starting above 4G, 32-bit devices will > - * use a DMA offset. > + * Return the maximum physical address for a zone with a given address size > + * limit. It currently assumes that for memory starting above 4G, 32-bit > + * devices will use a DMA offset. > */ > -static phys_addr_t __init max_zone_dma32_phys(void) > +static phys_addr_t __init max_zone_phys(unsigned int zone_bits) > { > - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); > - return min(offset + (1ULL << 32), memblock_end_of_DRAM()); > + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); > + return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); > } > > #ifdef CONFIG_NUMA > @@ -186,6 +194,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) > { > unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; > > +#ifdef CONFIG_ZONE_DMA > + max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); > +#endif > #ifdef CONFIG_ZONE_DMA32 > max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); > #endif > @@ -201,13 +212,18 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) > struct memblock_region *reg; > unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; > unsigned long max_dma32 = min; > + unsigned long max_dma = min; > > memset(zone_size, 0, sizeof(zone_size)); > > - /* 4GB maximum for 32-bit only capable devices */ > +#ifdef CONFIG_ZONE_DMA > + max_dma = PFN_DOWN(arm64_dma_phys_limit); > + zone_size[ZONE_DMA] = max_dma - min; > + max_dma32 = max_dma; > +#endif > #ifdef CONFIG_ZONE_DMA32 > max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); > - zone_size[ZONE_DMA32] = max_dma32 - min; > + zone_size[ZONE_DMA32] = max_dma32 - max_dma; > #endif > zone_size[ZONE_NORMAL] = max - max_dma32; > > @@ -219,11 +235,17 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) > > if (start >= max) > continue; > - > +#ifdef CONFIG_ZONE_DMA > + if (start < max_dma) { > + unsigned long dma_end = min_not_zero(end, max_dma); > + zhole_size[ZONE_DMA] -= dma_end - start; > + } > +#endif > #ifdef CONFIG_ZONE_DMA32 > if (start < max_dma32) { > - unsigned long dma_end = min(end, max_dma32); > - zhole_size[ZONE_DMA32] -= dma_end - start; > + unsigned long dma32_end = min(end, max_dma32); > + unsigned long dma32_start = max(start, max_dma); > + zhole_size[ZONE_DMA32] -= dma32_end - dma32_start; > } > #endif > if (end > max_dma32) { > @@ -418,9 +440,11 @@ void __init arm64_memblock_init(void) > > early_init_fdt_scan_reserved_mem(); > > - /* 4GB maximum for 32-bit only capable devices */ > + if (IS_ENABLED(CONFIG_ZONE_DMA)) > + arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS); > + > if (IS_ENABLED(CONFIG_ZONE_DMA32)) > - arm64_dma32_phys_limit = max_zone_dma32_phys(); > + arm64_dma32_phys_limit = max_zone_phys(32); > else > arm64_dma32_phys_limit = PHYS_MASK + 1; > > @@ -430,7 +454,7 @@ void __init arm64_memblock_init(void) > > high_memory = __va(memblock_end_of_DRAM() - 1) + 1; > > - dma_contiguous_reserve(arm64_dma32_phys_limit); > + dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit); > } > > void __init bootmem_init(void) > @@ -534,7 +558,7 @@ static void __init free_unused_memmap(void) > void __init mem_init(void) > { > if (swiotlb_force == SWIOTLB_FORCE || > - max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT)) > + max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) > swiotlb_init(1); > else > swiotlb_force = SWIOTLB_NO_FORCE; > -- > 2.23.0 > > With ZONE_DMA=y, this config will fail to reserve 512M CMA on a server, https://raw.githubusercontent.com/cailca/linux-mm/master/arm64.config CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_CMA_SIZE_SEL_MBYTES=y CONFIG_CMA_ALIGNMENT=8 CONFIG_CMA=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_AREAS=7 Is this expected?