Ken'ichi Ohmichi wrote: > Hello Dave, > > Dave Anderson wrote: > >>>I think that a new virtualization_init() function is a good idea. >>> >>>How about checking whether "Xen" exists in PT_NOTE section of an vmcore >>>at virtualization_init()? I feel that it is safer than reading a xen_start_info >>>pointer. >>> >>>$ readelf -n vmcore >>>Notes at offset 0x00000120 with length 0x00001388: >>> Owner Data size Description >>> CORE 0x00000150 NT_PRSTATUS (prstatus structure) >>> Xen 0x00000020 Unknown note type: (0x01000002) <----- HERE >>> CORE 0x00000150 NT_PRSTATUS (prstatus structure) >>> Xen 0x00000020 Unknown note type: (0x01000002) <----- HERE >>> Xen 0x00000050 Unknown note type: (0x01000001) <----- HERE >>> VMCOREINFO_XEN 0x00000fe4 Unknown note type: (0x00000000) >>>$ >> >>Well there may be a couple issues with that approach. First, we need >>a way to handle/recognize it on live systems. And second, I believe we'll need >>a way to differentiate a pv_ops/xen kernel from a "dedicated" xen kernel. >>And my guess is that the vmcore notes will not differentiate between the >>two "flavors" of xen kernel (unless there's an entry in the VMCOREINFO_XEN >>that will somehow differentiate the two). > > > Thank you for the good comment. I understand your plan, and I think it is > better than mine. Yeah, the more I look into it, the more it appears that support for pv_ops/xen (as well as the other pv_ops clients) are going to require significant changes. For now, only "traditional xen" will be supported, and when pv_ops/xen comes online, we'll take a look at how to merge support for it. Here's my initial update to your patch -- it simply recognizes pv_ops kernels, and disallows the setting of the ARCH_XEN in that case. Dave
Index: defs.h =================================================================== RCS file: /nfs/projects/cvs/crash/defs.h,v retrieving revision 1.349 diff -u -r1.349 defs.h --- defs.h 18 Aug 2008 20:38:04 -0000 1.349 +++ defs.h 3 Sep 2008 17:12:46 -0000 @@ -470,11 +470,13 @@ #define RELOC_SET (0x2000000) #define RELOC_FORCE (0x4000000) #define ARCH_OPENVZ (0x8000000) +#define ARCH_PVOPS (0x10000000) #define GCC_VERSION_DEPRECATED (GCC_3_2|GCC_3_2_3|GCC_2_96|GCC_3_3_2|GCC_3_3_3) #define XEN() (kt->flags & ARCH_XEN) #define OPENVZ() (kt->flags & ARCH_OPENVZ) +#define PVOPS() (kt->flags & ARCH_PVOPS) #define XEN_MACHINE_TO_MFN(m) ((ulonglong)(m) >> PAGESHIFT()) #define XEN_PFN_TO_PSEUDO(p) ((ulonglong)(p) << PAGESHIFT()) @@ -2124,6 +2126,8 @@ #define VMEMMAP_VADDR_2_6_24 0xffffe20000000000 #define VMEMMAP_END_2_6_24 0xffffe2ffffffffff +#define PAGE_OFFSET_2_6_27 0xffff880000000000 + #define USERSPACE_TOP_XEN 0x0000800000000000 #define PAGE_OFFSET_XEN 0xffff880000000000 #define VMALLOC_START_ADDR_XEN 0xffffc20000000000 @@ -2221,6 +2225,17 @@ #define PAGEBASE(X) (((ulong)(X)) & (ulong)machdep->pagemask) +#define _CPU_PDA_READ2(CPU, BUFFER) \ + ((readmem(symbol_value("_cpu_pda"), \ + KVADDR, &cpu_pda_addr, sizeof(unsigned long), \ + "_cpu_pda addr", FAULT_ON_ERROR)) && \ + (readmem(cpu_pda_addr + ((CPU) * sizeof(void *)), \ + KVADDR, &cpu_pda_addr, sizeof(unsigned long), \ + "_cpu_pda addr", FAULT_ON_ERROR)) && \ + (cpu_pda_addr) && \ + (readmem(cpu_pda_addr, KVADDR, (BUFFER), SIZE(x8664_pda), \ + "cpu_pda entry", FAULT_ON_ERROR))) + #define _CPU_PDA_READ(CPU, BUFFER) \ ((STRNEQ("_cpu_pda", closest_symbol((symbol_value("_cpu_pda") + \ ((CPU) * sizeof(unsigned long)))))) && \ @@ -3329,6 +3344,7 @@ long SIZE_option(long, long, char *, char *, int, char *, char *); void dump_trace(ulong *); int enumerator_value(char *, long *); +void paravirt_init(void); /* * memory.c Index: kernel.c =================================================================== RCS file: /nfs/projects/cvs/crash/kernel.c,v retrieving revision 1.186 diff -u -r1.186 kernel.c --- kernel.c 6 Aug 2008 20:53:33 -0000 1.186 +++ kernel.c 3 Sep 2008 17:32:09 -0000 @@ -79,7 +79,8 @@ kt->end = symbol_value("_end"); /* - * For the Xen architecture, default to writable page tables unless: + * For the traditional (non-pv_ops) Xen architecture, default to writable + * page tables unless: * * (1) it's an "xm save" CANONICAL_PAGE_TABLES dumpfile, or * (2) the --shadow_page_tables option was explicitly entered. @@ -88,7 +89,7 @@ * it's not an "xm save" canonical dumpfile, then we have no choice * but to presume shadow page tables. */ - if (symbol_exists("xen_start_info")) { + if (!PVOPS() && symbol_exists("xen_start_info")) { kt->flags |= ARCH_XEN; if (!(kt->xen_flags & (SHADOW_PAGE_TABLES|CANONICAL_PAGE_TABLES))) kt->xen_flags |= WRITABLE_PAGE_TABLES; @@ -3887,6 +3888,8 @@ fprintf(fp, "%sARCH_XEN", others++ ? "|" : ""); if (kt->flags & ARCH_OPENVZ) fprintf(fp, "%sARCH_OPENVZ", others++ ? "|" : ""); + if (kt->flags & ARCH_PVOPS) + fprintf(fp, "%sARCH_PVOPS", others++ ? "|" : ""); if (kt->flags & NO_IKCONFIG) fprintf(fp, "%sNO_IKCONFIG", others++ ? "|" : ""); if (kt->flags & DWARF_UNWIND) @@ -6290,3 +6293,24 @@ break; } } + +/* + * With the evidence available, attempt to pre-determine whether + * this is a paravirt-capable kernel running as bare-metal, xen, + * kvm, etc. + * + * NOTE: Only bare-metal pv_ops kernels are supported so far. + */ +void +paravirt_init(void) +{ + /* + * pv_init_ops appears to be (as of 2.6.27) an arch-common + * symbol. This may have to change. + */ + if (kernel_symbol_exists("pv_init_ops")) { + if (CRASHDEBUG(1)) + error(INFO, "pv_init_ops exists: ARCH_PVOPS\n"); + kt->flags |= ARCH_PVOPS; + } +} Index: main.c =================================================================== RCS file: /nfs/projects/cvs/crash/main.c,v retrieving revision 1.87 diff -u -r1.87 main.c --- main.c 18 Aug 2008 20:38:05 -0000 1.87 +++ main.c 3 Sep 2008 17:04:59 -0000 @@ -467,6 +467,7 @@ hq_init(); machdep_init(PRE_SYMTAB); symtab_init(); + paravirt_init(); machdep_init(PRE_GDB); datatype_init(); Index: x86_64.c =================================================================== RCS file: /nfs/projects/cvs/crash/x86_64.c,v retrieving revision 1.102 diff -u -r1.102 x86_64.c --- x86_64.c 23 Apr 2008 19:54:00 -0000 1.102 +++ x86_64.c 3 Sep 2008 17:19:26 -0000 @@ -147,7 +147,9 @@ case PRE_GDB: if (!(machdep->flags & VM_FLAGS)) { if (symbol_exists("xen_start_info")) { - if (symbol_exists("low_pml4") && + if (PVOPS()) + machdep->flags |= VM_2_6_11; + else if (symbol_exists("low_pml4") && symbol_exists("swap_low_mappings")) machdep->flags |= VM_XEN_RHEL4; else @@ -178,7 +180,6 @@ case VM_2_6_11: /* 2.6.11 layout */ machdep->machspec->userspace_top = USERSPACE_TOP_2_6_11; - machdep->machspec->page_offset = PAGE_OFFSET_2_6_11; machdep->machspec->vmalloc_start_addr = VMALLOC_START_ADDR_2_6_11; machdep->machspec->vmalloc_end = VMALLOC_END_2_6_11; machdep->machspec->modules_vaddr = MODULES_VADDR_2_6_11; @@ -190,6 +191,13 @@ if (symbol_exists("vmemmap_populate")) machdep->flags |= VMEMMAP; + if (kernel_symbol_exists("end_pfn")) + /* 2.6.11 layout */ + machdep->machspec->page_offset = PAGE_OFFSET_2_6_11; + else + /* 2.6.27 layout */ + machdep->machspec->page_offset = PAGE_OFFSET_2_6_27; + machdep->uvtop = x86_64_uvtop_level4; break; @@ -534,7 +542,7 @@ static void x86_64_cpu_pda_init(void) { - int i, cpus, nr_pda, cpunumber, _cpu_pda; + int i, cpus, nr_pda, cpunumber, _cpu_pda, _boot_cpu_pda; char *cpu_pda_buf; ulong level4_pgt, data_offset, cpu_pda_addr; struct syment *sp, *nsp; @@ -569,11 +577,21 @@ _cpu_pda = FALSE; } } - + if (_cpu_pda) { + if (symbol_exists("_boot_cpu_pda")) + _boot_cpu_pda = TRUE; + else + _boot_cpu_pda = FALSE; + } for (i = cpus = 0; i < nr_pda; i++) { if (_cpu_pda) { - if (!_CPU_PDA_READ(i, cpu_pda_buf)) - break; + if (_boot_cpu_pda) { + if (!_CPU_PDA_READ2(i, cpu_pda_buf)) + break; + } else { + if (!_CPU_PDA_READ(i, cpu_pda_buf)) + break; + } } else { if (!CPU_PDA_READ(i, cpu_pda_buf)) break; @@ -3914,7 +3932,7 @@ int x86_64_get_smp_cpus(void) { - int i, cpus, nr_pda, cpunumber, _cpu_pda; + int i, cpus, nr_pda, cpunumber, _cpu_pda, _boot_cpu_pda; char *cpu_pda_buf; ulong level4_pgt, cpu_pda_addr; @@ -3940,10 +3958,21 @@ _cpu_pda = FALSE; } } + if (_cpu_pda) { + if (symbol_exists("_boot_cpu_pda")) + _boot_cpu_pda = TRUE; + else + _boot_cpu_pda = FALSE; + } for (i = cpus = 0; i < nr_pda; i++) { if (_cpu_pda) { - if (!_CPU_PDA_READ(i, cpu_pda_buf)) - break; + if (_boot_cpu_pda) { + if (!_CPU_PDA_READ2(i, cpu_pda_buf)) + break; + } else { + if (!_CPU_PDA_READ(i, cpu_pda_buf)) + break; + } } else { if (!CPU_PDA_READ(i, cpu_pda_buf)) break;
-- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/crash-utility