Re: x86 remap allocator in kernel 3.0

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Dne St 11. ledna 2012 15:37:50 Dave Anderson napsal(a):
> ----- Original Message -----
> 
> > Dne St 11. ledna 2012 00:37:50 Petr Tesarik napsal(a):
> > > [...]
> > > I can see now that this is unnecessarily complicated, because the
> > > node_remap_* variables are static arrays of MAX_NUMNODES elements, so I
> > > can get their size from the debuginfo at POST_GDB init and initialize a
> > > machine-specific data type with it. I'll post another patch tomorrow.
> > 
> > And here we go. Tested on my system and seems to work just fine.
> > 
> > Petr Tesarik
> > SUSE Linux
> 
> Hi Petr,
> 
> This looks pretty good to me.  However, just to clarify the
> chicken-and-egg situation here...
> 
> When remap_init() does these 3 readmem() calls, they will pass
> through x86_kvtop_remap() -- which I guess would fail because
> the arrays would still be at least partially uninitialized?:

Hi Dave,

first of all, it's not a real issue, because kernel static data is always 
below the remapped addresses, but I agree, it's ugly and making non-obvious 
assumptions about memory layout. To make things clean, I can simply set 
machdep->machspec->max_numnodes as the last thing, so the loop in 
x86_kvtop_remap() cannot be executed until everything is initialized.

> +	readmem(start_vaddr, KVADDR, ms->remap_start_vaddr,
> +		ms->max_numnodes * sizeof(ulong), "node_remap_start_vaddr",
> +		FAULT_ON_ERROR);
> +	readmem(end_vaddr, KVADDR, ms->remap_end_vaddr,
> +		ms->max_numnodes * sizeof(ulong), "node_remap_end_vaddr",
> +		FAULT_ON_ERROR);
> +	readmem(start_pfn, KVADDR, ms->remap_start_pfn,
> +		ms->max_numnodes * sizeof(ulong), "node_remap_end_vaddr",
> +		FAULT_ON_ERROR);
> 
> To fortify this (and put my mind at ease), can we also set a "remap_state"
> flag in the machdep structure that describes the remap data structures in
> maybe two states:
> 
>   UNINITIALIZED - set at SETUP_ENV time, then cleared in x86_init() before
>                   remap_init() is called

Denoted by max_numnodes == 0

>   NOT_USED      - if remap_init() fails

Denoted by max_numnodes < 0

> Then have x86_kvtop_remap() -- which will get called on every readmem() or
> kvtop() for the rest of time, check for (UNINITIALIZED|NOT_USED), and
> return FALSE immediately if either are set?
> 
> What do you think?

Patch attached.

Petr Tesarik
SUSE Linux
From: Petr Tesarik <ptesarik@xxxxxxx>
Subject: [x86] Add correct handling of regions allocated with the remap allocator
References: bnc#738742
Patch-mainline: no

For NUMA x86, the pgdat is remapped into the node's physical memory. Since
that physical memory may not be reachable through the identity mapping, a
small part of the identity mapping is used.

This special case has never been handled properly by crash.

Signed-off-by: Petr Tesarik <ptesarik@xxxxxxx>

---
 defs.h |    4 ++
 x86.c  |  126 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 122 insertions(+), 8 deletions(-)

--- a/defs.h
+++ b/defs.h
@@ -4253,6 +4253,10 @@ struct machine_specific {
 	ulonglong last_pmd_read_PAE;
 	ulonglong last_ptbl_read_PAE;
 	ulong page_protnone;
+	int max_numnodes;
+	ulong *remap_start_vaddr;
+	ulong *remap_end_vaddr;
+	ulong *remap_start_pfn;
 };
 
 struct syment *x86_is_entry_tramp_address(ulong, ulong *); 
--- a/x86.c
+++ b/x86.c
@@ -996,6 +996,7 @@ static int x86_uvtop_xen_wpt(struct task
 static int x86_kvtop_xen_wpt(struct task_context *, ulong, physaddr_t *, int);
 static int x86_uvtop_xen_wpt_PAE(struct task_context *, ulong, physaddr_t *, int);
 static int x86_kvtop_xen_wpt_PAE(struct task_context *, ulong, physaddr_t *, int);
+static int x86_kvtop_remap(ulong, physaddr_t *);
 static ulong x86_get_task_pgd(ulong);
 static ulong x86_processor_speed(void);
 static ulong x86_get_pc(struct bt_info *);
@@ -1006,6 +1007,7 @@ static uint64_t x86_memory_size(void);
 static ulong x86_vmalloc_start(void);
 static ulong *read_idt_table(int);
 static void eframe_init(void);
+static int remap_init(void);
 #define READ_IDT_INIT     1
 #define READ_IDT_RUNTIME  2
 static char *extract_idt_function(ulong *, char *, ulong *);
@@ -2014,6 +2016,9 @@ x86_init(int when)
 				   "pr_reg");
 		STRUCT_SIZE_INIT(percpu_data, "percpu_data");
 
+		if (!remap_init())
+			machdep->machspec->max_numnodes = -1;
+
 		break;
 
 	case POST_INIT:
@@ -2084,6 +2089,74 @@ eframe_init(void)
 }
 
 /*
+ *  Locate regions remapped by the remap allocator
+ */
+static int
+remap_init(void)
+{
+	ulong start_vaddr, end_vaddr, start_pfn;
+	int max_numnodes;
+	struct machine_specific *ms;
+	struct syment *sp;
+
+	if (! (sp = symbol_search("node_remap_start_vaddr")) )
+		return FALSE;
+	start_vaddr = sp->value;
+
+	if (! (sp = symbol_search("node_remap_end_vaddr")) )
+		return FALSE;
+	end_vaddr = sp->value;
+
+	if (! (sp = symbol_search("node_remap_start_pfn")) )
+		return FALSE;
+	start_pfn = sp->value;
+
+	max_numnodes = get_array_length("node_remap_start_pfn", NULL,
+					sizeof(ulong));
+	if (max_numnodes < 1)
+		max_numnodes = 1;
+
+	ms = machdep->machspec;
+	ms->remap_start_vaddr = calloc(3 * max_numnodes, sizeof(ulong));
+	if (!ms->remap_start_vaddr)
+		error(FATAL, "cannot malloc remap array");
+	ms->remap_end_vaddr = ms->remap_start_vaddr + max_numnodes;
+	ms->remap_start_pfn = ms->remap_end_vaddr + max_numnodes;
+
+	readmem(start_vaddr, KVADDR, ms->remap_start_vaddr,
+		max_numnodes * sizeof(ulong), "node_remap_start_vaddr",
+		FAULT_ON_ERROR);
+	readmem(end_vaddr, KVADDR, ms->remap_end_vaddr,
+		max_numnodes * sizeof(ulong), "node_remap_end_vaddr",
+		FAULT_ON_ERROR);
+	readmem(start_pfn, KVADDR, ms->remap_start_pfn,
+		max_numnodes * sizeof(ulong), "node_remap_end_vaddr",
+		FAULT_ON_ERROR);
+	ms->max_numnodes = max_numnodes;
+
+	return TRUE;
+}
+
+static int
+x86_kvtop_remap(ulong kvaddr, physaddr_t *paddr)
+{
+	struct machine_specific *ms;
+	int i;
+
+	ms = machdep->machspec;
+
+	for (i = 0; i < ms->max_numnodes; ++i) {
+		if (kvaddr >= ms->remap_start_vaddr[i] &&
+		    kvaddr < ms->remap_end_vaddr[i]) {
+			*paddr = PTOB(ms->remap_start_pfn[i]) +
+				kvaddr - ms->remap_start_vaddr[i];
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
+/*
  *  Needs to be done this way because of potential 4G/4G split.
  */
 static int 
@@ -2768,12 +2841,13 @@ x86_kvtop(struct task_context *tc, ulong
 		}
 		pgd = (ulong *)symbol_value("idle_pg_table_l2");
 	} else {
-		if (!vt->vmalloc_start) {
+		if (x86_kvtop_remap(kvaddr, paddr)) {
+			if (!verbose)
+				return TRUE;
+		} else if (!vt->vmalloc_start) {
 			*paddr = VTOP(kvaddr);
 			return TRUE;
-		}
-
-		if (!IS_VMALLOC_ADDR(kvaddr)) { 
+		} else if (!IS_VMALLOC_ADDR(kvaddr)) { 
 			*paddr = VTOP(kvaddr);
 			if (!verbose)
 				return TRUE;
@@ -3023,12 +3097,13 @@ x86_kvtop_PAE(struct task_context *tc, u
 		else
 			pgd = (ulonglong *)symbol_value("idle_pg_table");
 	} else {
-		if (!vt->vmalloc_start) {
+		if (x86_kvtop_remap(kvaddr, paddr)) {
+			if (!verbose)
+				return TRUE;
+		} else if (!vt->vmalloc_start) {
 			*paddr = VTOP(kvaddr);
 			return TRUE;
-		}
-
-		if (!IS_VMALLOC_ADDR(kvaddr)) { 
+		} else if (!IS_VMALLOC_ADDR(kvaddr)) { 
 			*paddr = VTOP(kvaddr);
 			if (!verbose)
 				return TRUE;
@@ -3354,6 +3429,8 @@ x86_dump_machdep_table(ulong arg)
         int others;
 	ulong xen_wpt;
 	char buf[BUFSIZE];
+	struct machine_specific *ms;
+	int i, j, max_numnodes;
 
 	switch (arg) {
 	default:
@@ -3489,6 +3566,39 @@ x86_dump_machdep_table(ulong arg)
 		machdep->machspec->last_ptbl_read_PAE);
 	fprintf(fp, "                 page_protnone: %lx\n",
 		machdep->machspec->page_protnone);
+
+	ms = machdep->machspec;
+	max_numnodes = ms->max_numnodes;
+	fprintf(fp, "                  MAX_NUMNODES: ");
+	if (max_numnodes < 0) {
+		fprintf(fp, "(unused)\n");
+	} else {
+		fprintf(fp, "%ld\n", max_numnodes);
+
+		fprintf(fp, "             remap_start_vaddr:");
+		for (i = 0; i < max_numnodes; ++i) {
+			if ((i % 8) == 0)
+				fprintf(fp, "\n        ");
+			fprintf(fp, "%08lx ", ms->remap_start_vaddr[i]);
+		}
+		fprintf(fp, "\n");
+
+		fprintf(fp, "               remap_end_vaddr:");
+		for (i = 0; i < max_numnodes; ++i) {
+			if ((i % 8) == 0)
+				fprintf(fp, "\n        ");
+			fprintf(fp, "%08lx ", ms->remap_end_vaddr[i]);
+		}
+		fprintf(fp, "\n");
+
+		fprintf(fp, "               remap_start_pfn:");
+		for (i = 0; i < max_numnodes; ++i) {
+			if ((i % 8) == 0)
+				fprintf(fp, "\n        ");
+			fprintf(fp, "%08lx ", ms->remap_start_pfn[i]);
+		}
+		fprintf(fp, "\n");
+	}
 }
 
 /*
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/crash-utility

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux