On Thu, 2010-03-04 at 09:00 +0800, Zhang, Yanmin wrote: > On Wed, 2010-03-03 at 11:15 +0100, Peter Zijlstra wrote: > > On Wed, 2010-03-03 at 17:27 +0800, Zhang, Yanmin wrote: > > > -#ifndef perf_misc_flags > > > -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ > > > - PERF_RECORD_MISC_KERNEL) > > > -#define perf_instruction_pointer(regs) instruction_pointer(regs) > > > -#endif > > > > Ah, that #ifndef is for powerpc, which I think you just broke. > Thanks for the reminder. I deleted powerpc codes when building cscope > lib. > > It seems perf_save_virt_ip/perf_reset_virt_ip interfaces are ugly. I plan to > change them to a callback function struct and kvm registers its version to perf. > > Such like: > struct perf_guest_info_callbacks { > int (*is_in_guest)(); > u64 (*get_guest_ip)(); > int (*copy_guest_stack)(); > int (*reset_in_guest)(); > ... > }; > int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *); > int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *); > > It's more scalable and neater. In case you guys might lose patience, I worked out a new patch against 2.6.34-rc1. It could work with: #perf kvm --guest --guestkallsyms /guest/os/kernel/proc/kallsyms --guestmodules /guest/os/proc/modules top It also support to collect both host side and guest side at the same time: #perf kvm --host --guest --guestkallsyms /guest/os/kernel/proc/kallsyms --guestmodules /guest/os/proc/modules top The first output line of top has guest kernel/user space percentage. Or just host side: #perf kvm --host As tool perf source codes have lots of changes, I am still working on perf kvm record and report. --- diff -Nraup linux-2.6.34-rc1/arch/x86/include/asm/ptrace.h linux-2.6.34-rc1_work/arch/x86/include/asm/ptrace.h --- linux-2.6.34-rc1/arch/x86/include/asm/ptrace.h 2010-03-09 13:04:20.730596079 +0800 +++ linux-2.6.34-rc1_work/arch/x86/include/asm/ptrace.h 2010-03-10 17:06:34.228953260 +0800 @@ -167,6 +167,15 @@ static inline int user_mode(struct pt_re #endif } +static inline int user_mode_cs(u16 cs) +{ +#ifdef CONFIG_X86_32 + return (cs & SEGMENT_RPL_MASK) == USER_RPL; +#else + return !!(cs & 3); +#endif +} + static inline int user_mode_vm(struct pt_regs *regs) { #ifdef CONFIG_X86_32 diff -Nraup linux-2.6.34-rc1/arch/x86/kvm/vmx.c linux-2.6.34-rc1_work/arch/x86/kvm/vmx.c --- linux-2.6.34-rc1/arch/x86/kvm/vmx.c 2010-03-09 13:04:20.758593132 +0800 +++ linux-2.6.34-rc1_work/arch/x86/kvm/vmx.c 2010-03-10 17:11:49.709019136 +0800 @@ -26,6 +26,7 @@ #include <linux/sched.h> #include <linux/moduleparam.h> #include <linux/ftrace_event.h> +#include <linux/perf_event.h> #include "kvm_cache_regs.h" #include "x86.h" @@ -3632,6 +3633,43 @@ static void update_cr8_intercept(struct vmcs_write32(TPR_THRESHOLD, irr); } +DEFINE_PER_CPU(int, kvm_in_guest) = {0}; + +static void kvm_set_in_guest(void) +{ + percpu_write(kvm_in_guest, 1); +} + +static int kvm_is_in_guest(void) +{ + return percpu_read(kvm_in_guest); +} + +static int kvm_is_user_mode(void) +{ + int user_mode; + user_mode = user_mode_cs(vmcs_read16(GUEST_CS_SELECTOR)); + return user_mode; +} + +static u64 kvm_get_guest_ip(void) +{ + return vmcs_readl(GUEST_RIP); +} + +static void kvm_reset_in_guest(void) +{ + if (percpu_read(kvm_in_guest)) + percpu_write(kvm_in_guest, 0); +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, + .reset_in_guest = kvm_reset_in_guest +}; + static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -3653,8 +3691,11 @@ static void vmx_complete_interrupts(stru /* We need to handle NMIs before interrupts are enabled */ if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) + (exit_intr_info & INTR_INFO_VALID_MASK)) { + kvm_set_in_guest(); asm("int $2"); + kvm_reset_in_guest(); + } idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; @@ -4251,6 +4292,8 @@ static int __init vmx_init(void) if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); + perf_register_guest_info_callbacks(&kvm_guest_cbs); + return 0; out3: @@ -4266,6 +4309,8 @@ out: static void __exit vmx_exit(void) { + perf_unregister_guest_info_callbacks(&kvm_guest_cbs); + free_page((unsigned long)vmx_msr_bitmap_legacy); free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); diff -Nraup linux-2.6.34-rc1/include/linux/perf_event.h linux-2.6.34-rc1_work/include/linux/perf_event.h --- linux-2.6.34-rc1/include/linux/perf_event.h 2010-03-09 13:04:28.905944253 +0800 +++ linux-2.6.34-rc1_work/include/linux/perf_event.h 2010-03-10 17:06:34.228953260 +0800 @@ -287,11 +287,13 @@ struct perf_event_mmap_page { __u64 data_tail; /* user-space written tail */ }; -#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) +#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) #define PERF_RECORD_MISC_USER (2 << 0) #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) +#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) +#define PERF_RECORD_MISC_GUEST_USER (5 << 0) struct perf_event_header { __u32 type; @@ -439,6 +441,13 @@ enum perf_callchain_context { # include <asm/perf_event.h> #endif +struct perf_guest_info_callbacks { + int (*is_in_guest) (void); + int (*is_user_mode) (void); + u64 (*get_guest_ip) (void); + void (*reset_in_guest) (void); +}; + #ifdef CONFIG_HAVE_HW_BREAKPOINT #include <asm/hw_breakpoint.h> #endif @@ -849,6 +858,10 @@ static inline void perf_event_mmap(struc __perf_event_mmap(vma); } +extern u64 perf_instruction_pointer(struct pt_regs *regs); +int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *); +int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *); + extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); @@ -862,12 +875,6 @@ extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); extern void perf_bp_event(struct perf_event *event, void *data); -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ - PERF_RECORD_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif - extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, int nmi, int sample); @@ -902,6 +909,13 @@ perf_sw_event(u32 event_id, u64 nr, int static inline void perf_bp_event(struct perf_event *event, void *data) { } +static inline int perf_register_guest_info_callbacks +(struct perf_guest_info_callbacks *) {return 0; } +static inline int perf_unregister_guest_info_callbacks +(struct perf_guest_info_callbacks *) {return 0; } + +#define perf_instruction_pointer(event, regs) instruction_pointer(regs) + static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } diff -Nraup linux-2.6.34-rc1/kernel/perf_event.c linux-2.6.34-rc1_work/kernel/perf_event.c --- linux-2.6.34-rc1/kernel/perf_event.c 2010-03-09 13:04:30.085942017 +0800 +++ linux-2.6.34-rc1_work/kernel/perf_event.c 2010-03-10 17:06:34.232905199 +0800 @@ -2807,6 +2807,50 @@ __weak struct perf_callchain_entry *perf } /* + * We assume there is only KVM supporting the callbacks. + * Later on, we might change it to a list if there is + * another virtualization implementation supporting the callbacks. + */ +static struct perf_guest_info_callbacks *perf_guest_cbs; + +int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * cbs) +{ + perf_guest_cbs = cbs; + return 0; +} +EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); + +int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks * cbs) +{ + perf_guest_cbs = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); + +u64 perf_instruction_pointer(struct pt_regs *regs) +{ + u64 ip; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + ip = perf_guest_cbs->get_guest_ip(); + } else + ip = instruction_pointer(regs); + return ip; +} + +#ifndef perf_misc_flags +static inline unsigned int perf_misc_flags(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + return perf_guest_cbs->is_user_mode() ? + PERF_RECORD_MISC_GUEST_USER : + PERF_RECORD_MISC_GUEST_KERNEL; + } else + return user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} +#endif + +/* * Output */ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, diff -Nraup linux-2.6.34-rc1/tools/perf/builtin-diff.c linux-2.6.34-rc1_work/tools/perf/builtin-diff.c --- linux-2.6.34-rc1/tools/perf/builtin-diff.c 2010-03-09 13:04:31.373942905 +0800 +++ linux-2.6.34-rc1_work/tools/perf/builtin-diff.c 2010-03-10 17:06:34.232905199 +0800 @@ -222,6 +222,9 @@ int cmd_diff(int argc, const char **argv input_new = argv[1]; } else input_new = argv[0]; + } else if (symbol_conf.guest_vmlinux_name || symbol_conf.guest_kallsyms) { + input_old = "perf.data.host"; + input_new = "perf.data.guest"; } symbol_conf.exclude_other = false; diff -Nraup linux-2.6.34-rc1/tools/perf/builtin.h linux-2.6.34-rc1_work/tools/perf/builtin.h --- linux-2.6.34-rc1/tools/perf/builtin.h 2010-03-09 13:04:31.377861392 +0800 +++ linux-2.6.34-rc1_work/tools/perf/builtin.h 2010-03-10 17:06:34.232905199 +0800 @@ -32,5 +32,6 @@ extern int cmd_version(int argc, const c extern int cmd_probe(int argc, const char **argv, const char *prefix); extern int cmd_kmem(int argc, const char **argv, const char *prefix); extern int cmd_lock(int argc, const char **argv, const char *prefix); +extern int cmd_kvm(int argc, const char **argv, const char *prefix); #endif diff -Nraup linux-2.6.34-rc1/tools/perf/builtin-kvm.c linux-2.6.34-rc1_work/tools/perf/builtin-kvm.c --- linux-2.6.34-rc1/tools/perf/builtin-kvm.c 1970-01-01 08:00:00.000000000 +0800 +++ linux-2.6.34-rc1_work/tools/perf/builtin-kvm.c 2010-03-10 17:06:34.232905199 +0800 @@ -0,0 +1,123 @@ +#include "builtin.h" +#include "perf.h" + +#include "util/util.h" +#include "util/cache.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/header.h" +#include "util/session.h" + +#include "util/parse-options.h" +#include "util/trace-event.h" + +#include "util/debug.h" + +#include <sys/prctl.h> + +#include <semaphore.h> +#include <pthread.h> +#include <math.h> + +static char *file_name = NULL; +static char name_buffer[256]; + +int perf_host = 1; +int perf_guest = 0; + +static const char * const kvm_usage[] = { + "perf kvm [<options>] {top|record|report|diff}", + NULL +}; + +static const struct option kvm_options[] = { + OPT_STRING('i', "input", &file_name, "file", + "Input file name"), + OPT_STRING('o', "output", &file_name, "file", + "Output file name"), + OPT_BOOLEAN(0, "guest", &perf_guest, + "Collect guest os data"), + OPT_BOOLEAN(0, "host", &perf_host, + "Collect guest os data"), + OPT_STRING(0, "guestvmlinux", &symbol_conf.guest_vmlinux_name, "file", + "file saving guest os vmlinux"), + OPT_STRING(0, "guestkallsyms", &symbol_conf.guest_kallsyms, "file", + "file saving guest os /proc/kallsyms"), + OPT_STRING(0, "guestmodules", &symbol_conf.guest_modules, "file", + "file saving guest os /proc/modules"), + OPT_END() +}; + +static int __cmd_record(int argc, const char **argv) +{ + int rec_argc, i = 0, j; + const char **rec_argv; + + rec_argc = argc + 2; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + rec_argv[i++] = strdup("record"); + rec_argv[i++] = strdup("-o"); + rec_argv[i++] = strdup(file_name); + for (j = 1; j < argc; j++, i++) + rec_argv[i] = argv[j]; + + BUG_ON(i != rec_argc); + + return cmd_record(i, rec_argv, NULL); +} + +static int __cmd_report(int argc, const char **argv) +{ + int rec_argc, i = 0, j; + const char **rec_argv; + + rec_argc = argc + 2; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + rec_argv[i++] = strdup("report"); + rec_argv[i++] = strdup("-i"); + rec_argv[i++] = strdup(file_name); + for (j = 1; j < argc; j++, i++) + rec_argv[i] = argv[j]; + + BUG_ON(i != rec_argc); + + return cmd_report(i, rec_argv, NULL); +} + +int cmd_kvm(int argc, const char **argv, const char *prefix __used) +{ + perf_host = perf_guest = 0; + + argc = parse_options(argc, argv, kvm_options, kvm_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc) + usage_with_options(kvm_usage, kvm_options); + + if (!perf_host) + perf_guest = 1; + + if (!file_name) { + if (perf_host && !perf_guest) + sprintf(name_buffer, "perf.data.host"); + else if (!perf_host && perf_guest) + sprintf(name_buffer, "perf.data.guest"); + else + sprintf(name_buffer, "perf.data.kvm"); + file_name = name_buffer; + } + + if (!strncmp(argv[0], "rec", 3)) { + return __cmd_record(argc, argv); + } else if (!strncmp(argv[0], "rep", 3)) { + return __cmd_report(argc, argv); + } else if (!strncmp(argv[0], "diff", 4)) { + return cmd_diff(argc, argv, NULL); + } else if (!strncmp(argv[0], "top", 3)) { + return cmd_top(argc, argv, NULL); + } else { + usage_with_options(kvm_usage, kvm_options); + } + + return 0; +} + diff -Nraup linux-2.6.34-rc1/tools/perf/builtin-top.c linux-2.6.34-rc1_work/tools/perf/builtin-top.c --- linux-2.6.34-rc1/tools/perf/builtin-top.c 2010-03-09 13:04:31.377861392 +0800 +++ linux-2.6.34-rc1_work/tools/perf/builtin-top.c 2010-03-10 17:06:34.232905199 +0800 @@ -409,7 +409,8 @@ static double sym_weight(const struct sy } static long samples; -static long userspace_samples; +static long kernel_samples, userspace_samples; +static long guest_us_samples, guest_kernel_samples; static const char CONSOLE_CLEAR[] = "[H[2J"; static void __list_insert_active_sym(struct sym_entry *syme) @@ -449,7 +450,10 @@ static void print_sym_table(void) int printed = 0, j; int counter, snap = !display_weighted ? sym_counter : 0; float samples_per_sec = samples/delay_secs; - float ksamples_per_sec = (samples-userspace_samples)/delay_secs; + float ksamples_per_sec = (kernel_samples)/delay_secs; + float userspace_samples_per_sec = (userspace_samples)/delay_secs; + float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs; + float guest_us_samples_per_sec = (guest_us_samples)/delay_secs; float sum_ksamples = 0.0; struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; @@ -457,7 +461,8 @@ static void print_sym_table(void) int sym_width = 0, dso_width = 0, max_dso_width; const int win_width = winsize.ws_col - 1; - samples = userspace_samples = 0; + samples = kernel_samples = userspace_samples = 0; + guest_kernel_samples = guest_us_samples = 0; /* Sort the active symbols */ pthread_mutex_lock(&active_symbols_lock); @@ -488,9 +493,19 @@ static void print_sym_table(void) puts(CONSOLE_CLEAR); printf("%-*.*s\n", win_width, win_width, graph_dotted_line); - printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", - samples_per_sec, - 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); + if (!perf_guest) { + printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", + samples_per_sec, + 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); + } else { + printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% user:%4.1f%% guest kernel:%4.1f%% guest user:%4.1f%% [", + samples_per_sec, + 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)), + 100.0 - (100.0*((samples_per_sec-userspace_samples_per_sec)/samples_per_sec)), + 100.0 - (100.0*((samples_per_sec-guest_kernel_samples_per_sec)/samples_per_sec)), + 100.0 - (100.0*((samples_per_sec-guest_us_samples_per_sec)/samples_per_sec)) + ); + } if (nr_counters == 1 || !display_weighted) { printf("%Ld", (u64)attrs[0].sample_period); @@ -947,9 +962,17 @@ static void event__process_sample(const return; break; case PERF_RECORD_MISC_KERNEL: + ++kernel_samples; if (hide_kernel_symbols) return; break; + case PERF_RECORD_MISC_GUEST_KERNEL: + ++guest_kernel_samples; + break; + case PERF_RECORD_MISC_GUEST_USER: + ++guest_us_samples; + /* TODO: we don't process guest user from host side. */ + return; default: return; } diff -Nraup linux-2.6.34-rc1/tools/perf/Makefile linux-2.6.34-rc1_work/tools/perf/Makefile --- linux-2.6.34-rc1/tools/perf/Makefile 2010-03-09 13:04:31.341942020 +0800 +++ linux-2.6.34-rc1_work/tools/perf/Makefile 2010-03-10 17:06:34.232905199 +0800 @@ -458,6 +458,7 @@ BUILTIN_OBJS += builtin-trace.o BUILTIN_OBJS += builtin-probe.o BUILTIN_OBJS += builtin-kmem.o BUILTIN_OBJS += builtin-lock.o +BUILTIN_OBJS += builtin-kvm.o PERFLIBS = $(LIB_FILE) diff -Nraup linux-2.6.34-rc1/tools/perf/perf.c linux-2.6.34-rc1_work/tools/perf/perf.c --- linux-2.6.34-rc1/tools/perf/perf.c 2010-03-09 13:04:31.377861392 +0800 +++ linux-2.6.34-rc1_work/tools/perf/perf.c 2010-03-10 17:06:34.232905199 +0800 @@ -304,6 +304,7 @@ static void handle_internal_command(int { "probe", cmd_probe, 0 }, { "kmem", cmd_kmem, 0 }, { "lock", cmd_lock, 0 }, + { "kvm", cmd_kvm, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff -Nraup linux-2.6.34-rc1/tools/perf/perf.h linux-2.6.34-rc1_work/tools/perf/perf.h --- linux-2.6.34-rc1/tools/perf/perf.h 2010-03-09 13:04:16.357945701 +0800 +++ linux-2.6.34-rc1_work/tools/perf/perf.h 2010-03-10 17:06:34.236904596 +0800 @@ -131,4 +131,6 @@ struct ip_callchain { u64 ips[0]; }; +extern int perf_host, perf_guest; + #endif diff -Nraup linux-2.6.34-rc1/tools/perf/util/event.c linux-2.6.34-rc1_work/tools/perf/util/event.c --- linux-2.6.34-rc1/tools/perf/util/event.c 2010-03-09 13:04:31.381941876 +0800 +++ linux-2.6.34-rc1_work/tools/perf/util/event.c 2010-03-10 17:06:34.236904596 +0800 @@ -442,12 +442,16 @@ void thread__find_addr_map(struct thread al->thread = self; al->addr = addr; - if (cpumode == PERF_RECORD_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; mg = &session->kmaps; - } else if (cpumode == PERF_RECORD_MISC_USER) + } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; - else { + } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { + al->level = 'g'; + mg = &session->guest_kmaps; + } else { + /* TODO: We don't support guest user space. Might support late */ al->level = 'H'; al->map = NULL; return; @@ -464,10 +468,18 @@ try_again: * "[vdso]" dso, but for now lets use the old trick of looking * in the whole kernel symbol list. */ - if ((long long)al->addr < 0 && mg != &session->kmaps) { + if ((long long)al->addr < 0 && + mg != &session->kmaps && + cpumode == PERF_RECORD_MISC_KERNEL) { mg = &session->kmaps; goto try_again; } + if ((long long)al->addr < 0 && + mg != &session->guest_kmaps && + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + mg = &session->guest_kmaps; + goto try_again; + } } else al->addr = al->map->map_ip(al->map, al->addr); } diff -Nraup linux-2.6.34-rc1/tools/perf/util/session.c linux-2.6.34-rc1_work/tools/perf/util/session.c --- linux-2.6.34-rc1/tools/perf/util/session.c 2010-03-09 13:04:31.385942104 +0800 +++ linux-2.6.34-rc1_work/tools/perf/util/session.c 2010-03-10 17:06:34.236904596 +0800 @@ -54,7 +54,12 @@ out_close: static inline int perf_session__create_kernel_maps(struct perf_session *self) { - return map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps); + int ret; + ret = map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps); + if (ret >= 0) + ret = map_groups__create_guest_kernel_maps(&self->guest_kmaps, + self->guest_vmlinux_maps); + return ret; } struct perf_session *perf_session__new(const char *filename, int mode, bool force) @@ -76,6 +81,7 @@ struct perf_session *perf_session__new(c self->cwdlen = 0; self->unknown_events = 0; map_groups__init(&self->kmaps); + map_groups__init(&self->guest_kmaps); if (mode == O_RDONLY) { if (perf_session__open(self, force) < 0) diff -Nraup linux-2.6.34-rc1/tools/perf/util/session.h linux-2.6.34-rc1_work/tools/perf/util/session.h --- linux-2.6.34-rc1/tools/perf/util/session.h 2010-03-09 13:04:31.385942104 +0800 +++ linux-2.6.34-rc1_work/tools/perf/util/session.h 2010-03-10 17:06:34.236904596 +0800 @@ -16,9 +16,11 @@ struct perf_session { unsigned long size; unsigned long mmap_window; struct map_groups kmaps; + struct map_groups guest_kmaps; struct rb_root threads; struct thread *last_match; struct map *vmlinux_maps[MAP__NR_TYPES]; + struct map *guest_vmlinux_maps[MAP__NR_TYPES]; struct events_stats events_stats; unsigned long event_total[PERF_RECORD_MAX]; unsigned long unknown_events; @@ -83,6 +85,6 @@ static inline struct map * perf_session__new_module_map(struct perf_session *self, u64 start, const char *filename) { - return map_groups__new_module(&self->kmaps, start, filename); + return map_groups__new_module(&self->kmaps, start, filename, 0); } #endif /* __PERF_SESSION_H */ diff -Nraup linux-2.6.34-rc1/tools/perf/util/symbol.c linux-2.6.34-rc1_work/tools/perf/util/symbol.c --- linux-2.6.34-rc1/tools/perf/util/symbol.c 2010-03-09 13:04:31.385942104 +0800 +++ linux-2.6.34-rc1_work/tools/perf/util/symbol.c 2010-03-10 17:06:34.236904596 +0800 @@ -27,6 +27,8 @@ enum dso_origin { DSO__ORIG_BUILDID, DSO__ORIG_DSO, DSO__ORIG_KMODULE, + DSO__ORIG_GUEST_KERNEL, + DSO__ORIG_GUEST_KMODULE, DSO__ORIG_NOT_FOUND, }; @@ -34,6 +36,8 @@ static void dsos__add(struct list_head * static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static int dso__load_kernel_sym(struct dso *self, struct map *map, symbol_filter_t filter); +static int dso__load_guest_kernel_sym(struct dso *self, struct map *map, + symbol_filter_t filter); static int vmlinux_path__nr_entries; static char **vmlinux_path; @@ -184,6 +188,7 @@ struct dso *dso__new(const char *name) self->loaded = 0; self->sorted_by_name = 0; self->has_build_id = 0; + self->kernel = DSO_TYPE_USER; } return self; @@ -523,13 +528,19 @@ static int dso__split_kallsyms(struct ds char dso_name[PATH_MAX]; struct dso *dso; - snprintf(dso_name, sizeof(dso_name), "[kernel].%d", - kernel_range++); + if (self->kernel == DSO_TYPE_GUEST_KERNEL) + snprintf(dso_name, sizeof(dso_name), "[guest.kernel].%d", + kernel_range++); + else + snprintf(dso_name, sizeof(dso_name), "[kernel].%d", + kernel_range++); dso = dso__new(dso_name); if (dso == NULL) return -1; + dso->kernel = self->kernel; + curr_map = map__new2(pos->start, dso, map->type); if (curr_map == NULL) { dso__delete(dso); @@ -563,7 +574,10 @@ int dso__load_kallsyms(struct dso *self, return -1; symbols__fixup_end(&self->symbols[map->type]); - self->origin = DSO__ORIG_KERNEL; + if (self->kernel == DSO_TYPE_GUEST_KERNEL) + self->origin = DSO__ORIG_GUEST_KERNEL; + else + self->origin = DSO__ORIG_KERNEL; return dso__split_kallsyms(self, map, filter); } @@ -951,7 +965,7 @@ static int dso__load_sym(struct dso *sel nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - if (!self->kernel) { + if (self->kernel == DSO_TYPE_USER) { self->adjust_symbols = (ehdr.e_type == ET_EXEC || elf_section_by_name(elf, &ehdr, &shdr, ".gnu.prelink_undo", @@ -983,7 +997,7 @@ static int dso__load_sym(struct dso *sel section_name = elf_sec__name(&shdr, secstrs); - if (self->kernel || kmodule) { + if (self->kernel != DSO_TYPE_USER || kmodule) { char dso_name[PATH_MAX]; if (strcmp(section_name, @@ -1009,6 +1023,7 @@ static int dso__load_sym(struct dso *sel curr_dso = dso__new(dso_name); if (curr_dso == NULL) goto out_elf_end; + curr_dso->kernel = self->kernel; curr_map = map__new2(start, curr_dso, map->type); if (curr_map == NULL) { @@ -1017,9 +1032,15 @@ static int dso__load_sym(struct dso *sel } curr_map->map_ip = identity__map_ip; curr_map->unmap_ip = identity__map_ip; - curr_dso->origin = DSO__ORIG_KERNEL; + if (curr_dso->kernel == DSO_TYPE_GUEST_KERNEL) { + curr_dso->origin = DSO__ORIG_GUEST_KERNEL; + dsos__add(&dsos__guest_kernel, curr_dso); + } else { + curr_dso->origin = DSO__ORIG_KERNEL; + dsos__add(&dsos__kernel, curr_dso); + } + map_groups__insert(kmap->kmaps, curr_map); - dsos__add(&dsos__kernel, curr_dso); dso__set_loaded(curr_dso, map->type); } else curr_dso = curr_map->dso; @@ -1240,6 +1261,8 @@ char dso__symtab_origin(const struct dso [DSO__ORIG_BUILDID] = 'b', [DSO__ORIG_DSO] = 'd', [DSO__ORIG_KMODULE] = 'K', + [DSO__ORIG_GUEST_KERNEL] = 'g', + [DSO__ORIG_GUEST_KMODULE] = 'G', }; if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) @@ -1258,8 +1281,10 @@ int dso__load(struct dso *self, struct m dso__set_loaded(self, map->type); - if (self->kernel) + if (self->kernel == DSO_TYPE_KERNEL) return dso__load_kernel_sym(self, map, filter); + else if (self->kernel == DSO_TYPE_GUEST_KERNEL) + return dso__load_guest_kernel_sym(self, map, filter); name = malloc(size); if (!name) @@ -1463,7 +1488,7 @@ static int map_groups__set_modules_path( static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) { struct map *self = zalloc(sizeof(*self) + - (dso->kernel ? sizeof(struct kmap) : 0)); + (dso->kernel != DSO_TYPE_USER ? sizeof(struct kmap) : 0)); if (self != NULL) { /* * ->end will be filled after we load all the symbols @@ -1475,11 +1500,15 @@ static struct map *map__new2(u64 start, } struct map *map_groups__new_module(struct map_groups *self, u64 start, - const char *filename) + const char *filename, int guest) { struct map *map; struct dso *dso = __dsos__findnew(&dsos__kernel, filename); + if (!guest) + dso = __dsos__findnew(&dsos__kernel, filename); + else + dso = __dsos__findnew(&dsos__guest_kernel, filename); if (dso == NULL) return NULL; @@ -1487,16 +1516,20 @@ struct map *map_groups__new_module(struc if (map == NULL) return NULL; - dso->origin = DSO__ORIG_KMODULE; + if (guest) + dso->origin = DSO__ORIG_GUEST_KMODULE; + else + dso->origin = DSO__ORIG_KMODULE; map_groups__insert(self, map); return map; } -static int map_groups__create_modules(struct map_groups *self) +static int __map_groups__create_modules(struct map_groups *self, + const char * filename, int guest) { char *line = NULL; size_t n; - FILE *file = fopen("/proc/modules", "r"); + FILE *file = fopen(filename, "r"); struct map *map; if (file == NULL) @@ -1530,16 +1563,17 @@ static int map_groups__create_modules(st *sep = '\0'; snprintf(name, sizeof(name), "[%s]", line); - map = map_groups__new_module(self, start, name); + map = map_groups__new_module(self, start, name, guest); if (map == NULL) goto out_delete_line; - dso__kernel_module_get_build_id(map->dso); + if (!guest) + dso__kernel_module_get_build_id(map->dso); } free(line); fclose(file); - return map_groups__set_modules_path(self); + return 0; out_delete_line: free(line); @@ -1547,6 +1581,21 @@ out_failure: return -1; } +static int map_groups__create_modules(struct map_groups *self) +{ + int ret; + + ret = __map_groups__create_modules(self, "/proc/modules", 0); + if (ret >= 0) + ret = map_groups__set_modules_path(self); + return ret; +} + +static int map_groups__create_guest_modules(struct map_groups *self) +{ + return __map_groups__create_modules(self, symbol_conf.guest_modules, 1); +} + static int dso__load_vmlinux(struct dso *self, struct map *map, const char *vmlinux, symbol_filter_t filter) { @@ -1706,8 +1755,44 @@ out_fixup: return err; } +static int dso__load_guest_kernel_sym(struct dso *self, struct map *map, + symbol_filter_t filter) +{ + int err; + const char *kallsyms_filename; + /* + * if the user specified a vmlinux filename, use it and only + * it, reporting errors to the user if it cannot be used. + * Or use file guest_kallsyms inputted by user on commandline + */ + if (symbol_conf.guest_vmlinux_name != NULL) { + err = dso__load_vmlinux(self, map, + symbol_conf.guest_vmlinux_name, filter); + goto out_try_fixup; + } + + kallsyms_filename = symbol_conf.guest_kallsyms; + if (!kallsyms_filename) + return -1; + err = dso__load_kallsyms(self, kallsyms_filename, map, filter); + if (err > 0) + pr_debug("Using %s for symbols\n", kallsyms_filename); + +out_try_fixup: + if (err > 0) { + if (kallsyms_filename != NULL) + dso__set_long_name(self, strdup("[guest.kernel.kallsyms]")); + map__fixup_start(map); + map__fixup_end(map); + } + + return err; +} + LIST_HEAD(dsos__user); LIST_HEAD(dsos__kernel); +LIST_HEAD(dsos__guest_user); +LIST_HEAD(dsos__guest_kernel); static void dsos__add(struct list_head *head, struct dso *dso) { @@ -1754,6 +1839,8 @@ void dsos__fprintf(FILE *fp) { __dsos__fprintf(&dsos__kernel, fp); __dsos__fprintf(&dsos__user, fp); + __dsos__fprintf(&dsos__guest_kernel, fp); + __dsos__fprintf(&dsos__guest_user, fp); } static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, @@ -1783,7 +1870,19 @@ struct dso *dso__new_kernel(const char * if (self != NULL) { self->short_name = "[kernel]"; - self->kernel = 1; + self->kernel = DSO_TYPE_KERNEL; + } + + return self; +} + +struct dso *dso__new_guest_kernel(const char *name) +{ + struct dso *self = dso__new(name ?: "[guest.kernel.kallsyms]"); + + if (self != NULL) { + self->short_name = "[guest.kernel]"; + self->kernel = DSO_TYPE_GUEST_KERNEL; } return self; @@ -1808,6 +1907,16 @@ static struct dso *dsos__create_kernel(c return kernel; } +static struct dso *dsos__create_guest_kernel(const char *vmlinux) +{ + struct dso *kernel = dso__new_guest_kernel(vmlinux); + + kernel->kernel = DSO_TYPE_GUEST_KERNEL; + if (kernel != NULL) + dsos__add(&dsos__guest_kernel, kernel); + return kernel; +} + int __map_groups__create_kernel_maps(struct map_groups *self, struct map *vmlinux_maps[MAP__NR_TYPES], struct dso *kernel) @@ -1956,3 +2065,24 @@ int map_groups__create_kernel_maps(struc map_groups__fixup_end(self); return 0; } + +int map_groups__create_guest_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES]) +{ + struct dso *kernel = dsos__create_guest_kernel(symbol_conf.guest_vmlinux_name); + + if (kernel == NULL) + return -1; + + if (__map_groups__create_kernel_maps(self, vmlinux_maps, kernel) < 0) + return -1; + + if (symbol_conf.use_modules && map_groups__create_guest_modules(self) < 0) + pr_debug("Problems creating module maps, continuing anyway...\n"); + /* + * Now that we have all the maps created, just set the ->end of them: + */ + map_groups__fixup_end(self); + return 0; +} + diff -Nraup linux-2.6.34-rc1/tools/perf/util/symbol.h linux-2.6.34-rc1_work/tools/perf/util/symbol.h --- linux-2.6.34-rc1/tools/perf/util/symbol.h 2010-03-09 13:04:31.385942104 +0800 +++ linux-2.6.34-rc1_work/tools/perf/util/symbol.h 2010-03-10 17:06:34.236904596 +0800 @@ -66,7 +66,10 @@ struct symbol_conf { full_paths; const char *vmlinux_name, *field_sep; - char *dso_list_str, + const char *guest_vmlinux_name, + *guest_kallsyms, + *guest_modules; + char *dso_list_str, *comm_list_str, *sym_list_str, *col_width_list_str; @@ -97,6 +100,12 @@ struct addr_location { bool filtered; }; +enum dso_kernel_type { + DSO_TYPE_USER = 0, + DSO_TYPE_KERNEL, + DSO_TYPE_GUEST_KERNEL +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; @@ -104,7 +113,7 @@ struct dso { u8 adjust_symbols:1; u8 slen_calculated:1; u8 has_build_id:1; - u8 kernel:1; + enum dso_kernel_type kernel; u8 hit:1; unsigned char origin; u8 sorted_by_name; @@ -118,6 +127,7 @@ struct dso { struct dso *dso__new(const char *name); struct dso *dso__new_kernel(const char *name); +struct dso *dso__new_guest_kernel(const char *name); void dso__delete(struct dso *self); bool dso__loaded(const struct dso *self, enum map_type type); @@ -130,7 +140,7 @@ static inline void dso__set_loaded(struc void dso__sort_by_name(struct dso *self, enum map_type type); -extern struct list_head dsos__user, dsos__kernel; +extern struct list_head dsos__user, dsos__kernel, dsos__guest_user, dsos__guest_kernel; struct dso *__dsos__findnew(struct list_head *head, const char *name); diff -Nraup linux-2.6.34-rc1/tools/perf/util/thread.h linux-2.6.34-rc1_work/tools/perf/util/thread.h --- linux-2.6.34-rc1/tools/perf/util/thread.h 2010-03-09 13:04:31.385942104 +0800 +++ linux-2.6.34-rc1_work/tools/perf/util/thread.h 2010-03-10 17:06:34.236904596 +0800 @@ -79,6 +79,9 @@ int __map_groups__create_kernel_maps(str int map_groups__create_kernel_maps(struct map_groups *self, struct map *vmlinux_maps[MAP__NR_TYPES]); +int map_groups__create_guest_kernel_maps(struct map_groups *self, + struct map *vmlinux_maps[MAP__NR_TYPES]); + struct map *map_groups__new_module(struct map_groups *self, u64 start, - const char *filename); + const char *filename, int guest); #endif /* __PERF_THREAD_H */ -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html