On Thu, Jan 23, 2014 at 11:22 PM, Santosh Shilimkar <santosh.shilimkar@xxxxxx> wrote: > On Friday 24 January 2014 02:04 AM, Yinghai Lu wrote: >> On Thu, Jan 23, 2014 at 10:56 PM, Santosh Shilimkar >> <santosh.shilimkar@xxxxxx> wrote: >>> On Friday 24 January 2014 01:38 AM, Santosh Shilimkar wrote: >> >>> The patch which is now commit 457ff1d {lib/swiotlb.c: use >>> memblock apis for early memory allocations} was the breaking the >>> boot on Andrew's machine. Now if I look back the patch, based on your >>> above description, I believe below hunk waS/is the culprit. >>> >>> @@ -172,8 +172,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) >>> /* >>> * Get the overflow emergency buffer >>> */ >>> - v_overflow_buffer = alloc_bootmem_low_pages_nopanic( >>> - PAGE_ALIGN(io_tlb_overflow)); >>> + v_overflow_buffer = memblock_virt_alloc_nopanic( >>> + PAGE_ALIGN(io_tlb_overflow), >>> + PAGE_SIZE); >>> if (!v_overflow_buffer) >>> return -ENOMEM; >>> >>> >>> Looks like 'v_overflow_buffer' must be allocated from low memory in this >>> case. Is that correct ? >> >> yes. >> >> but should the change like following >> >> commit 457ff1de2d247d9b8917c4664c2325321a35e313 >> Author: Santosh Shilimkar <santosh.shilimkar@xxxxxx> >> Date: Tue Jan 21 15:50:30 2014 -0800 >> >> lib/swiotlb.c: use memblock apis for early memory allocations >> >> >> @@ -215,13 +220,13 @@ swiotlb_init(int verbose) >> bytes = io_tlb_nslabs << IO_TLB_SHIFT; >> >> /* Get IO TLB memory from the low pages */ >> - vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); >> + vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); >> if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) >> return; >> > OK. So we need '__alloc_bootmem_low()' equivalent memblock API. We will try > to come up with a patch for the same. Thanks for inputs. Yes, Andrew, can you try attached two patches in your setup? Assume your system does not have intel iommu support? Thanks Yinghai
Subject: [PATCH] x86: Fix numa with reverting wrong memblock setting. Dave reported Numa on x86 is broken on system with 1T memory. It turns out | commit 5b6e529521d35e1bcaa0fe43456d1bbb335cae5d | Author: Santosh Shilimkar <santosh.shilimkar@xxxxxx> | Date: Tue Jan 21 15:50:03 2014 -0800 | | x86: memblock: set current limit to max low memory address set limit to low wrongly. max_low_pfn_mapped is different from max_pfn_mapped. max_low_pfn_mapped is always under 4G. That will memblock_alloc_nid all go under 4G. Revert that offending patch. Reported-by: Dave Hansen <dave.hansen@xxxxxxxxx> Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx> --- arch/x86/include/asm/page_types.h | 4 ++-- arch/x86/kernel/setup.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) Index: linux-2.6/arch/x86/include/asm/page_types.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/page_types.h +++ linux-2.6/arch/x86/include/asm/page_types.h @@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned lo extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; -static inline phys_addr_t get_max_low_mapped(void) +static inline phys_addr_t get_max_mapped(void) { - return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT; + return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; } bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); Index: linux-2.6/arch/x86/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup.c +++ linux-2.6/arch/x86/kernel/setup.c @@ -1173,7 +1173,7 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); - memblock_set_current_limit(get_max_low_mapped()); + memblock_set_current_limit(get_max_mapped()); dma_contiguous_reserve(0); /*
--- arch/arm/kernel/setup.c | 2 +- include/linux/bootmem.h | 37 +++++++++++++++++++++++++++++++++++++ lib/swiotlb.c | 4 ++-- 3 files changed, 40 insertions(+), 3 deletions(-) Index: linux-2.6/include/linux/bootmem.h =================================================================== --- linux-2.6.orig/include/linux/bootmem.h +++ linux-2.6/include/linux/bootmem.h @@ -175,6 +175,27 @@ static inline void * __init memblock_vir NUMA_NO_NODE); } +#ifndef ARCH_LOW_ADDRESS_LIMIT +#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL +#endif + +static inline void * __init memblock_virt_alloc_low( + phys_addr_t size, phys_addr_t align) +{ + return memblock_virt_alloc_try_nid(size, align, + BOOTMEM_LOW_LIMIT, + ARCH_LOW_ADDRESS_LIMIT, + NUMA_NO_NODE); +} +static inline void * __init memblock_virt_alloc_low_nopanic( + phys_addr_t size, phys_addr_t align) +{ + return memblock_virt_alloc_try_nid_nopanic(size, align, + BOOTMEM_LOW_LIMIT, + ARCH_LOW_ADDRESS_LIMIT, + NUMA_NO_NODE); +} + static inline void * __init memblock_virt_alloc_from_nopanic( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) { @@ -238,6 +259,22 @@ static inline void * __init memblock_vir return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT); } +static inline void * __init memblock_virt_alloc_low( + phys_addr_t size, phys_addr_t align) +{ + if (!align) + align = SMP_CACHE_BYTES; + return __alloc_bootmem_low(size, align, BOOTMEM_LOW_LIMIT); +} + +static inline void * __init memblock_virt_alloc_low_nopanic( + phys_addr_t size, phys_addr_t align) +{ + if (!align) + align = SMP_CACHE_BYTES; + return __alloc_bootmem_low_nopanic(size, align, BOOTMEM_LOW_LIMIT); +} + static inline void * __init memblock_virt_alloc_from_nopanic( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr) { Index: linux-2.6/lib/swiotlb.c =================================================================== --- linux-2.6.orig/lib/swiotlb.c +++ linux-2.6/lib/swiotlb.c @@ -172,7 +172,7 @@ int __init swiotlb_init_with_tbl(char *t /* * Get the overflow emergency buffer */ - v_overflow_buffer = memblock_virt_alloc_nopanic( + v_overflow_buffer = memblock_virt_alloc_low_nopanic( PAGE_ALIGN(io_tlb_overflow), PAGE_SIZE); if (!v_overflow_buffer) @@ -220,7 +220,7 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); + vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; Index: linux-2.6/arch/arm/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/arm/kernel/setup.c +++ linux-2.6/arch/arm/kernel/setup.c @@ -717,7 +717,7 @@ static void __init request_standard_reso kernel_data.end = virt_to_phys(_end - 1); for_each_memblock(memory, region) { - res = memblock_virt_alloc(sizeof(*res), 0); + res = memblock_virt_alloc_low(sizeof(*res), 0); res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;