Re: mmotm boot panic bootmem-avoid-dma32-zone-by-default.patch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 03/05/2010 02:26 AM, Jiri Slaby wrote:
> On 03/05/2010 10:04 AM, Yinghai Lu wrote:
>> according to context
>> http://patchwork.kernel.org/patch/73893/
>>
>> Jiri, 
>> please check current linus tree still have problem about mem_map is using that much low mem?
> 
> Hi!
> 
> Sorry, I don't have direct access to the machine. I might try to ask the
> owners to do so.
> 
>> on my 1024g system first node has 128G ram, [2g, 4g) are mmio range.
> 
> So where gets your mem_map allocated (I suppose you're running flat model)?
> 
> Note that the failure we were seeing was with different amount of memory
> on different machines. Obviously because of different e820 reservations
> and driver requirements at boot time. So the required memory to trigger
> the error oscillated around 128G, sometimes being 130G.
> 
> It triggered when mem_map fit exactly into 0-2G (and 2-4G was reserved)
> and no more space was there. If RAM was more than 130G, mem_map was
> above 4G boundary implicitly, so that there was enough space in the
> first 4G of memory for others with specific bootmem limitations.
> 
>> with NO_BOOTMEM
>> [    0.000000]  a - 11
>> [    0.000000]  19 40 - 80 95
>> [    0.000000]  702 740 - 1000 1000
>> [    0.000000]  331f 3340 - 3400 3400
>> [    0.000000]  35dd - 3600
>> [    0.000000]  37dd - 3800
>> [    0.000000]  39dd - 3a00
>> [    0.000000]  3bdd - 3c00
>> [    0.000000]  3ddd - 3e00
>> [    0.000000]  3fdd - 4000
>> [    0.000000]  41dd - 4200
>> [    0.000000]  43dd - 4400
>> [    0.000000]  45dd - 4600
>> [    0.000000]  47dd - 4800
>> [    0.000000]  49dd - 4a00
>> [    0.000000]  4bdd - 4c00
>> [    0.000000]  4ddd - 4e00
>> [    0.000000]  4fdd - 5000
>> [    0.000000]  51dd - 5200
>> [    0.000000]  93dd 9400 - 7d500 7d53b
>> [    0.000000]  7f730 - 7f750
>> [    0.000000]  100012 100040 - 100200 100200
>> [    0.000000]  170200 170200 - 2080000 2080000
>> [    0.000000]  2080065 2080080 - 2080200 2080200
>>
>> so PFN: 9400 - 7d500 are free.
> 
> Could you explain more the dmesg output?

it will list free pfn range that will be use for slab...

attached is debug patch for print out without CONFIG_NO_BOOTMEM set.

YH
Subject: [PATCH -v3] x86: print bootmem free before and free_all_bootmem

so we could double check if we have enough low pages later

-v2: fix errors checkpatch.pl reported
-v3: move after pci_iommu_alloc, so could compare it with NO_BOOTMEM

Signed-off-by: Yinghai Lu <yinghai@xxxxxxxxxx>

---
 arch/x86/mm/init_64.c   |    2 +
 include/linux/bootmem.h |    3 +
 mm/bootmem.c            |   91 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+)

Index: linux-2.6/mm/bootmem.c
===================================================================
--- linux-2.6.orig/mm/bootmem.c
+++ linux-2.6/mm/bootmem.c
@@ -335,6 +335,97 @@ static void __init __free(bootmem_data_t
 			BUG();
 }
 
+static void __init print_all_bootmem_free_core(bootmem_data_t *bdata)
+{
+	int aligned;
+	unsigned long *map;
+	unsigned long start, end, count = 0;
+	unsigned long free_start = -1UL, free_end = 0;
+
+	if (!bdata->node_bootmem_map)
+		return;
+
+	start = bdata->node_min_pfn;
+	end = bdata->node_low_pfn;
+
+	/*
+	 * If the start is aligned to the machines wordsize, we might
+	 * be able to count it in bulks of that order.
+	 */
+	aligned = !(start & (BITS_PER_LONG - 1));
+
+	printk(KERN_DEBUG "nid=%td start=0x%010lx end=0x%010lx aligned=%d\n",
+		bdata - bootmem_node_data, start, end, aligned);
+	map = bdata->node_bootmem_map;
+
+	while (start < end) {
+		unsigned long idx, vec;
+
+		idx = start - bdata->node_min_pfn;
+		vec = ~map[idx / BITS_PER_LONG];
+
+		if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
+			if (free_start == -1UL) {
+				free_start = idx;
+				free_end = free_start + BITS_PER_LONG;
+			} else {
+				if (free_end == idx) {
+					free_end += BITS_PER_LONG;
+				} else {
+					/* there is gap, print old */
+					printk(KERN_DEBUG "  free [0x%010lx - 0x%010lx]\n",
+							free_start + bdata->node_min_pfn,
+							free_end + bdata->node_min_pfn);
+					free_start = idx;
+					free_end = idx + BITS_PER_LONG;
+				}
+			}
+			count += BITS_PER_LONG;
+		} else {
+			unsigned long off = 0;
+
+			while (vec && off < BITS_PER_LONG) {
+				if (vec & 1) {
+					if (free_start == -1UL) {
+						free_start = idx + off;
+						free_end = free_start + 1;
+					} else {
+						if (free_end == (idx + off)) {
+							free_end++;
+						} else {
+							/* there is gap, print old */
+							printk(KERN_DEBUG "  free [0x%010lx - 0x%010lx]\n",
+								free_start + bdata->node_min_pfn,
+								free_end + bdata->node_min_pfn);
+							free_start = idx + off;
+							free_end = free_start + 1;
+						}
+					}
+					count++;
+				}
+				vec >>= 1;
+				off++;
+			}
+		}
+		start += BITS_PER_LONG;
+	}
+
+	/* last one */
+	if (free_start != -1UL)
+		printk(KERN_DEBUG "  free [0x%010lx - 0x%010lx]\n",
+			free_start + bdata->node_min_pfn,
+			free_end + bdata->node_min_pfn);
+	printk(KERN_DEBUG "  total free 0x%010lx\n", count);
+}
+
+void __init print_bootmem_free(void)
+{
+	bootmem_data_t *bdata;
+
+	list_for_each_entry(bdata, &bdata_list, list)
+		print_all_bootmem_free_core(bdata);
+}
+
 static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
 			unsigned long eidx, int flags)
 {
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -679,6 +679,8 @@ void __init mem_init(void)
 
 	pci_iommu_alloc();
 
+	print_bootmem_free();
+
 	/* clear_bss() already clear the empty_zero_page */
 
 	reservedpages = 0;
Index: linux-2.6/include/linux/bootmem.h
===================================================================
--- linux-2.6.orig/include/linux/bootmem.h
+++ linux-2.6/include/linux/bootmem.h
@@ -38,6 +38,9 @@ typedef struct bootmem_data {
 } bootmem_data_t;
 
 extern bootmem_data_t bootmem_node_data[];
+void print_bootmem_free(void);
+#else
+static inline void print_bootmem_free(void) {}
 #endif
 
 extern unsigned long bootmem_bootmap_pages(unsigned long);

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]