Re: Panic on 8-node system in memblock_virt_alloc_try_nid()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jan 23, 2014 at 11:22 PM, Santosh Shilimkar
<santosh.shilimkar@xxxxxx> wrote:
> On Friday 24 January 2014 02:04 AM, Yinghai Lu wrote:
>> On Thu, Jan 23, 2014 at 10:56 PM, Santosh Shilimkar
>> <santosh.shilimkar@xxxxxx> wrote:
>>> On Friday 24 January 2014 01:38 AM, Santosh Shilimkar wrote:
>>
>>> The patch which is now commit 457ff1d {lib/swiotlb.c: use
>>> memblock apis for early memory allocations} was the breaking the
>>> boot on Andrew's machine. Now if I look back the patch, based on your
>>> above description, I believe below hunk waS/is the culprit.
>>>
>>> @@ -172,8 +172,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
>>>         /*
>>>          * Get the overflow emergency buffer
>>>          */
>>> -       v_overflow_buffer = alloc_bootmem_low_pages_nopanic(
>>> -                                               PAGE_ALIGN(io_tlb_overflow));
>>> +       v_overflow_buffer = memblock_virt_alloc_nopanic(
>>> +                                               PAGE_ALIGN(io_tlb_overflow),
>>> +                                               PAGE_SIZE);
>>>         if (!v_overflow_buffer)
>>>                 return -ENOMEM;
>>>
>>>
>>> Looks like 'v_overflow_buffer' must be allocated from low memory in this
>>> case. Is that correct ?
>>
>> yes.
>>
>> but should the change like following
>>
>> commit 457ff1de2d247d9b8917c4664c2325321a35e313
>> Author: Santosh Shilimkar <santosh.shilimkar@xxxxxx>
>> Date:   Tue Jan 21 15:50:30 2014 -0800
>>
>>     lib/swiotlb.c: use memblock apis for early memory allocations
>>
>>
>> @@ -215,13 +220,13 @@ swiotlb_init(int verbose)
>>         bytes = io_tlb_nslabs << IO_TLB_SHIFT;
>>
>>         /* Get IO TLB memory from the low pages */
>> -       vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes));
>> +       vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
>>         if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
>>                 return;
>>
> OK. So we need '__alloc_bootmem_low()' equivalent memblock API. We will try
> to come up with a patch for the same. Thanks for inputs.

Yes,

Andrew, can you try attached two patches in your setup?

Assume your system does not have intel iommu support?

Thanks

Yinghai
Subject: [PATCH] x86: Fix numa with reverting wrong memblock setting.

Dave reported Numa on x86 is broken on system with 1T memory.

It turns out
| commit 5b6e529521d35e1bcaa0fe43456d1bbb335cae5d
| Author: Santosh Shilimkar <santosh.shilimkar@xxxxxx>
| Date:   Tue Jan 21 15:50:03 2014 -0800
|
|    x86: memblock: set current limit to max low memory address

set limit to low wrongly.

max_low_pfn_mapped is different from max_pfn_mapped.
max_low_pfn_mapped is always under 4G.

That will memblock_alloc_nid all go under 4G.

Revert that offending patch.

Reported-by: Dave Hansen <dave.hansen@xxxxxxxxx>
Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>


---
 arch/x86/include/asm/page_types.h |    4 ++--
 arch/x86/kernel/setup.c           |    2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/x86/include/asm/page_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_types.h
+++ linux-2.6/arch/x86/include/asm/page_types.h
@@ -51,9 +51,9 @@ extern int devmem_is_allowed(unsigned lo
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
-static inline phys_addr_t get_max_low_mapped(void)
+static inline phys_addr_t get_max_mapped(void)
 {
-	return (phys_addr_t)max_low_pfn_mapped << PAGE_SHIFT;
+	return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
 }
 
 bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -1173,7 +1173,7 @@ void __init setup_arch(char **cmdline_p)
 
 	setup_real_mode();
 
-	memblock_set_current_limit(get_max_low_mapped());
+	memblock_set_current_limit(get_max_mapped());
 	dma_contiguous_reserve(0);
 
 	/*
---
 arch/arm/kernel/setup.c |    2 +-
 include/linux/bootmem.h |   37 +++++++++++++++++++++++++++++++++++++
 lib/swiotlb.c           |    4 ++--
 3 files changed, 40 insertions(+), 3 deletions(-)

Index: linux-2.6/include/linux/bootmem.h
===================================================================
--- linux-2.6.orig/include/linux/bootmem.h
+++ linux-2.6/include/linux/bootmem.h
@@ -175,6 +175,27 @@ static inline void * __init memblock_vir
 						    NUMA_NO_NODE);
 }
 
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
+#endif
+
+static inline void * __init memblock_virt_alloc_low(
+                                        phys_addr_t size, phys_addr_t align)
+{
+        return memblock_virt_alloc_try_nid(size, align,
+						   BOOTMEM_LOW_LIMIT,
+						   ARCH_LOW_ADDRESS_LIMIT,
+						   NUMA_NO_NODE);
+}
+static inline void * __init memblock_virt_alloc_low_nopanic(
+                                        phys_addr_t size, phys_addr_t align)
+{
+        return memblock_virt_alloc_try_nid_nopanic(size, align,
+						   BOOTMEM_LOW_LIMIT,
+						   ARCH_LOW_ADDRESS_LIMIT,
+						   NUMA_NO_NODE);
+}
+
 static inline void * __init memblock_virt_alloc_from_nopanic(
 		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
 {
@@ -238,6 +259,22 @@ static inline void * __init memblock_vir
 	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
 }
 
+static inline void * __init memblock_virt_alloc_low(
+                                        phys_addr_t size, phys_addr_t align)
+{
+	if (!align)
+		align = SMP_CACHE_BYTES;
+	return __alloc_bootmem_low(size, align, BOOTMEM_LOW_LIMIT);
+}
+
+static inline void * __init memblock_virt_alloc_low_nopanic(
+                                        phys_addr_t size, phys_addr_t align)
+{
+	if (!align)
+		align = SMP_CACHE_BYTES;
+	return __alloc_bootmem_low_nopanic(size, align, BOOTMEM_LOW_LIMIT);
+}
+
 static inline void * __init memblock_virt_alloc_from_nopanic(
 		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
 {
Index: linux-2.6/lib/swiotlb.c
===================================================================
--- linux-2.6.orig/lib/swiotlb.c
+++ linux-2.6/lib/swiotlb.c
@@ -172,7 +172,7 @@ int __init swiotlb_init_with_tbl(char *t
 	/*
 	 * Get the overflow emergency buffer
 	 */
-	v_overflow_buffer = memblock_virt_alloc_nopanic(
+	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
 						PAGE_ALIGN(io_tlb_overflow),
 						PAGE_SIZE);
 	if (!v_overflow_buffer)
@@ -220,7 +220,7 @@ swiotlb_init(int verbose)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
-	vstart = memblock_virt_alloc_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
+	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
 		return;
 
Index: linux-2.6/arch/arm/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/arm/kernel/setup.c
+++ linux-2.6/arch/arm/kernel/setup.c
@@ -717,7 +717,7 @@ static void __init request_standard_reso
 	kernel_data.end     = virt_to_phys(_end - 1);
 
 	for_each_memblock(memory, region) {
-		res = memblock_virt_alloc(sizeof(*res), 0);
+		res = memblock_virt_alloc_low(sizeof(*res), 0);
 		res->name  = "System RAM";
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]