On Wed, 4 Jul 2018 14:39:01 +0300 "Yordan Karadzhov (VMware)" <y.karadz@xxxxxxxxx> wrote: > > /** > > * @brief Load the content of the trace data file into an array of > > * kshark_entries. This function provides fast loading, however the > > Actually , after applying the patch for trace-input.c and this patch on > top kshark_load_data_entries() becomes slower than > kshark_load_data_records(). Just make this clear in the description of > the functions. > > > @@ -521,9 +587,11 @@ ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx, > > struct kshark_entry ***data_rows) In my tests, it showed a 30% slowdown. I really didn't like that. So I looked at why it was so slow, and it appeared two fold. One, was that I was doing a double allocation (allocating for both the rec_list and the entry), the other was that holding on to all records still appeared to slow things down a bit. Even with the array of storage, it made it slow. I decided to make it closer to the original but still with helper functions. The key was modifying the struct rec_list to be: struct rec_list { union { struct kshark_entry entry; struct { struct rec_list *next; /* Matches entry->next */ struct pevent_record *rec; }; }; }; And creating a enum: enum rec_type { REC_RECORD, REC_ENTRY, }; That would allow the functions to know what type it was dealing with. Will this change affect your numpy work much? You can add a REC_NUMPY if needed. Try it out and see if it improves things on your end: Oh, and I pushed my patches. -- Steve Index: trace-cmd.git/kernel-shark-qt/src/libkshark.c =================================================================== --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.c +++ trace-cmd.git/kernel-shark-qt/src/libkshark.c @@ -503,12 +503,23 @@ static void kshark_set_entry_values(stru /* Quiet warnings over documenting simple structures */ //! @cond Doxygen_Suppress struct rec_list { - struct pevent_record *rec; - struct rec_list *next; + union { + struct kshark_entry entry; + struct { + struct rec_list *next; /* Matches entry->next */ + struct pevent_record *rec; + }; + }; }; //! @endcond -static void free_rec_list(struct rec_list **rec_list, int n_cpus) +enum rec_type { + REC_RECORD, + REC_ENTRY, +}; + +static void free_rec_list(struct rec_list **rec_list, int n_cpus, + enum rec_type type) { struct rec_list *temp_rec; int cpu; @@ -517,7 +528,8 @@ static void free_rec_list(struct rec_lis while (rec_list[cpu]) { temp_rec = rec_list[cpu]; rec_list[cpu] = temp_rec->next; - free_record(temp_rec->rec); + if (type == REC_RECORD) + free_record(temp_rec->rec); free(temp_rec); } } @@ -525,8 +537,9 @@ static void free_rec_list(struct rec_lis } static size_t get_records(struct kshark_context *kshark_ctx, - struct rec_list ***rec_list) + struct rec_list ***rec_list, enum rec_type type) { + struct event_filter *adv_filter = NULL; struct pevent_record *rec; struct rec_list **temp_next; struct rec_list **cpu_list; @@ -547,12 +560,50 @@ static size_t get_records(struct kshark_ rec = tracecmd_read_cpu_first(kshark_ctx->handle, cpu); while (rec) { - *temp_next = temp_rec = malloc(sizeof(*temp_rec)); + *temp_next = temp_rec = calloc(1, sizeof(*temp_rec)); if (!temp_rec) goto fail; - temp_rec->rec = rec; temp_rec->next = NULL; + + switch (type) { + case REC_RECORD: + temp_rec->rec = rec; + break; + case REC_ENTRY: { + struct kshark_task_list *task; + struct kshark_entry *entry; + int ret; + + if (!adv_filter) + adv_filter = kshark_ctx->advanced_event_filter; + entry = &temp_rec->entry; + kshark_set_entry_values(kshark_ctx, rec, entry); + task = kshark_add_task(kshark_ctx, entry->pid); + if (!task) { + free_record(rec); + goto fail; + } + + /* Apply event filtering. */ + ret = FILTER_NONE; + if (adv_filter->filters) + ret = pevent_filter_match(adv_filter, rec); + + if (!kshark_show_event(kshark_ctx, entry->event_id) || + ret != FILTER_MATCH) { + unset_event_filter_flag(kshark_ctx, entry); + } + + /* Apply task filtering. */ + if (!kshark_show_task(kshark_ctx, entry->pid)) { + entry->visible &= ~kshark_ctx->filter_mask; + } + free_record(rec); + break; + } /* REC_ENTRY */ + } + temp_next = &temp_rec->next; ++count; @@ -566,13 +617,15 @@ static size_t get_records(struct kshark_ return total; fail: - free_rec_list(cpu_list, n_cpus); + free_rec_list(cpu_list, n_cpus, type); return -ENOMEM; } -static int pick_next_cpu(struct rec_list **rec_list, int n_cpus) +static int pick_next_cpu(struct rec_list **rec_list, int n_cpus, + enum rec_type type) { uint64_t ts = 0; + uint64_t rec_ts; int next_cpu = -1; int cpu; @@ -580,8 +633,16 @@ static int pick_next_cpu(struct rec_list if (!rec_list[cpu]) continue; - if (!ts || rec_list[cpu]->rec->ts < ts) { - ts = rec_list[cpu]->rec->ts; + switch (type) { + case REC_RECORD: + rec_ts = rec_list[cpu]->rec->ts; + break; + case REC_ENTRY: + rec_ts = rec_list[cpu]->entry.ts; + break; + } + if (!ts || rec_ts < ts) { + ts = rec_ts; next_cpu = cpu; } } @@ -610,16 +671,11 @@ static int pick_next_cpu(struct rec_list ssize_t kshark_load_data_entries(struct kshark_context *kshark_ctx, struct kshark_entry ***data_rows) { - struct event_filter *adv_filter = kshark_ctx->advanced_event_filter; - struct kshark_task_list *task; struct kshark_entry **rows; - struct kshark_entry *entry; struct rec_list **rec_list; - struct rec_list *temp_rec; - struct pevent_record *rec; + enum rec_type type = REC_ENTRY; size_t count, total = 0; int n_cpus; - int ret; if (*data_rows) free(*data_rows); @@ -631,63 +687,33 @@ ssize_t kshark_load_data_entries(struct * code simplier. We should revisit to see if we can * bring back the performance. */ - total = get_records(kshark_ctx, &rec_list); + total = get_records(kshark_ctx, &rec_list, type); if (total < 0) goto fail; + n_cpus = tracecmd_cpus(kshark_ctx->handle); + rows = calloc(total, sizeof(struct kshark_entry *)); if(!rows) - goto fail; - - n_cpus = tracecmd_cpus(kshark_ctx->handle); + goto fail_free; for (count = 0; count < total; count++) { int next_cpu; - next_cpu = pick_next_cpu(rec_list, n_cpus); + next_cpu = pick_next_cpu(rec_list, n_cpus, type); if (next_cpu >= 0) { - entry = malloc(sizeof(struct kshark_entry)); - if (!entry) - goto fail_free; - - rec = rec_list[next_cpu]->rec; - rows[count] = entry; - - kshark_set_entry_values(kshark_ctx, rec, entry); - task = kshark_add_task(kshark_ctx, entry->pid); - if (!task) - goto fail_free; - - /* Apply event filtering. */ - ret = FILTER_NONE; - if (adv_filter->filters) - ret = pevent_filter_match(adv_filter, rec); - - if (!kshark_show_event(kshark_ctx, entry->event_id) || - ret != FILTER_MATCH) { - unset_event_filter_flag(kshark_ctx, entry); - } - - /* Apply task filtering. */ - if (!kshark_show_task(kshark_ctx, entry->pid)) { - entry->visible &= ~kshark_ctx->filter_mask; - } - - temp_rec = rec_list[next_cpu]; + rows[count] = &rec_list[next_cpu]->entry; rec_list[next_cpu] = rec_list[next_cpu]->next; - free(temp_rec); - /* The record is no longer be referenced */ - free_record(rec); } } - free_rec_list(rec_list, n_cpus); + free_rec_list(rec_list, n_cpus, type); *data_rows = rows; return total; fail_free: - free_rec_list(rec_list, n_cpus); + free_rec_list(rec_list, n_cpus, type); for (count = 0; count < total; count++) { if (!rows[count]) break; @@ -717,11 +743,12 @@ ssize_t kshark_load_data_records(struct struct pevent_record *rec; struct rec_list **rec_list; struct rec_list *temp_rec; + enum rec_type type = REC_RECORD; size_t count, total = 0; int n_cpus; int pid; - total = get_records(kshark_ctx, &rec_list); + total = get_records(kshark_ctx, &rec_list, REC_RECORD); if (total < 0) goto fail; @@ -734,7 +761,7 @@ ssize_t kshark_load_data_records(struct for (count = 0; count < total; count++) { int next_cpu; - next_cpu = pick_next_cpu(rec_list, n_cpus); + next_cpu = pick_next_cpu(rec_list, n_cpus, type); if (next_cpu >= 0) { rec = rec_list[next_cpu]->rec; @@ -753,7 +780,7 @@ ssize_t kshark_load_data_records(struct } /* There should be no records left in rec_list */ - free_rec_list(rec_list, n_cpus); + free_rec_list(rec_list, n_cpus, type); *data_rows = rows; return total; Index: trace-cmd.git/kernel-shark-qt/src/libkshark.h =================================================================== --- trace-cmd.git.orig/kernel-shark-qt/src/libkshark.h +++ trace-cmd.git/kernel-shark-qt/src/libkshark.h @@ -33,6 +33,9 @@ extern "C" { * info etc.) is available on-demand via the offset into the trace file. */ struct kshark_entry { + /** Pointer to the next (in time) kshark_entry on the same CPU core. */ + struct kshark_entry *next; /* MUST BE FIRST ENTRY */ + /** * A bit mask controlling the visibility of the entry. A value of OxFF * would mean that the entry is visible everywhere. Use @@ -60,9 +63,6 @@ struct kshark_entry { * started. */ uint64_t ts; - - /** Pointer to the next (in time) kshark_entry on the same CPU core. */ - struct kshark_entry *next; }; /** Size of the task's hash table. */