----- "Chandru" <chandru@xxxxxxxxxx> wrote: > This thread relates to an old issue discussed earlier here ... > https://www.redhat.com/archives/crash-utility/2008-April/msg00007.html. > The following patch currently fixes the issue. The kernel cpu possible,present > and online cpu map is not available until cpu_maps_init() initializes them. Hence > we remap the nd->nt_prstatus_percpu array to online cpus right after a call to > this function. A couple points: > Signed-off-by: Chandru Siddalingappa <chandru@xxxxxxxxxxxxxxxxxx> > Cc: Haren Myneni <haren@xxxxxxxxxx> > --- > > --- crash-4.0-8.10/ppc64.c.orig 2009-06-08 16:08:09.000000000 +0530 > +++ crash-4.0-8.10/ppc64.c 2009-06-08 18:47:04.000000000 +0530 > @@ -2407,13 +2407,11 @@ ppc64_paca_init(void) > if (!symbol_exists("paca")) > error(FATAL, "PPC64: Could not find 'paca' symbol\n"); > > - if (cpu_map_addr("present")) > - map = PRESENT; > - else if (cpu_map_addr("online")) > - map = ONLINE; > + if (cpu_map_addr("possible")) > + map = POSSIBLE; > else > error(FATAL, > - "PPC64: cannot find 'cpu_present_map' or 'cpu_online_map' > symbols\n"); > + "PPC64: cannot find 'cpu_possible_map' symbol\n"); > > if (!MEMBER_EXISTS("paca_struct", "data_offset")) > return; Depending upon "cpu_possible_map" breaks backwards-compatibility for old kernels that don't even have a "cpu_possible_map". The function will still need to fallback to *something* that exists instead of killing the whole crash session. > @@ -2424,7 +2422,7 @@ ppc64_paca_init(void) > cpu_paca_buf = GETBUF(SIZE(ppc64_paca)); > > if (!(nr_paca = get_array_length("paca", NULL, 0))) > - nr_paca = NR_CPUS; > + nr_paca = kt->kernel_NR_CPUS; > > if (nr_paca > NR_CPUS) { > error(WARNING, It is possible that kt->kernel_NR_CPUS may not even be initialized at this point in time -- and for that matter, it is possible that kt->kernel_NR_CPUS may *never* be initialized. So for that reason, whenever it is used, the code first checks for a non-zero value. and if zero, defaults to the compiled-in, equal-to-or-higher, value of NR_CPUS. > @@ -2435,7 +2433,7 @@ ppc64_paca_init(void) > > for (i = cpus = 0; i < nr_paca; i++) { > /* > - * CPU present (or online)? > + * CPU in possible map ? > */ > if (!in_cpu_map(map, i)) > continue; > --- crash-4.0-8.10/kernel.c.orig 2009-06-08 16:07:53.000000000 +0530 > +++ crash-4.0-8.10/kernel.c 2009-06-08 16:48:53.000000000 +0530 > @@ -74,6 +74,9 @@ kernel_init() > > cpu_maps_init(); > > + if (KDUMP_DUMPFILE()) > + map_prstatus_array(); > + > kt->stext = symbol_value("_stext"); > kt->etext = symbol_value("_etext"); > get_text_init_space(); > --- crash-4.0-8.10/netdump.c.orig 2009-06-08 16:07:58.000000000 +0530 > +++ crash-4.0-8.10/netdump.c 2009-06-08 17:40:36.000000000 +0530 > @@ -45,6 +45,35 @@ static void check_dumpfile_size(char *); > (machine_type("IA64") || machine_type("PPC64")) > > /* > + * kdump installs NT_PRSTATUS elf sections only to the cpus > + * that were online during dumping. Hence we call into > + * this function after reading the cpu map from the kernel, > + * to remap the NT_PRSTATUS sections only to the online cpus > + */ > +void map_prstatus_array(void) > +{ > + void *nt_ptr; > + int i, j; > + > + /* temporary buffer to hold the prstatus_percpu array */ > + if ((nt_ptr = (void *)calloc(nd->num_prstatus_notes, > + sizeof(void *))) == NULL) > + error(FATAL, > + "cannot allocate a buffer to hold prstatus_percpu array\n"); > + > + memcpy((void *)nt_ptr, nd->nt_prstatus_percpu, > + nd->num_prstatus_notes * sizeof(void *)); > + memset(nd->nt_prstatus_percpu, 0, nd->num_prstatus_notes); > + > + /* re-populate the array with the sections mapping to online cpus > */ > + for (i = 0, j = 0; i < kt->kernel_NR_CPUS; i++) > + if (in_cpu_map(ONLINE, i)) > + ((unsigned long *)nd->nt_prstatus_percpu)[i] = > + ((unsigned long *)nt_ptr)[j++]; > + free(nt_ptr); > +} Same thing with kt->kernel_NR_CPUS usage above... > + > +/* > * Determine whether a file is a netdump/diskdump/kdump creation, > * and if TRUE, initialize the vmcore_data structure. > */ > @@ -618,7 +647,7 @@ get_netdump_panic_task(void) > crashing_cpu = -1; > if (kernel_symbol_exists("crashing_cpu")) { > get_symbol_data("crashing_cpu", sizeof(int), &i); > - if ((i >= 0) && (i < nd->num_prstatus_notes)) { > + if ((i >= 0) && in_cpu_map(ONLINE, i)) { > crashing_cpu = i; > if (CRASHDEBUG(1)) > error(INFO, > @@ -2236,7 +2265,7 @@ get_netdump_regs_ppc64(struct bt_info *b > * CPUs if they responded to an IPI. > */ > if (nd->num_prstatus_notes > 1) { > - if (bt->tc->processor >= nd->num_prstatus_notes) > + if (!nd->nt_prstatus_percpu[bt->tc->processor]) > error(FATAL, > "cannot determine NT_PRSTATUS ELF note " > "for %s task: %lx\n", > And lastly, when I run a kernel with this patch against a set of x86_64-only dumpfiles, I get a segmentation violation like this on certain kdump kernels: ... please wait... (determining panic task) Program received signal SIGSEGV, Segmentation fault. 0x000000000051c79c in get_netdump_panic_task () at netdump.c:719 719 len = roundup(len + note64->n_namesz, 4); (gdb) bt #0 0x000000000051c79c in get_netdump_panic_task () at netdump.c:719 #1 0x0000000000521ae5 in get_kdump_panic_task () at netdump.c:2316 #2 0x00000000004a5550 in get_dumpfile_panic_task () at task.c:5493 #3 0x00000000004a51b1 in panic_search () at task.c:5386 #4 0x00000000004a2ef6 in get_panic_context () at task.c:4574 #5 0x00000000004974ee in task_init () at task.c:456 #6 0x0000000000449e3a in main_loop () at main.c:536 ... And if I remove the call to map_prstatus_array(), it works OK again. I haven't dug into what changed to cause the problem though... Dave -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility