From 1224550969e0bf18785786a1a9f801cd86d68586 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2010 11:54:02 -0300 Subject: perf tools: Don't trow away old map slices not overlapped by new maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: David S. Miller Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267800842-22324-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/thread.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 21b92162282b..9024fa1ff5c2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -183,8 +183,8 @@ struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) return th; } -static void map_groups__remove_overlappings(struct map_groups *self, - struct map *map) +static int map_groups__fixup_overlappings(struct map_groups *self, + struct map *map) { struct rb_root *root = &self->maps[map->type]; struct rb_node *next = rb_first(root); @@ -209,7 +209,36 @@ static void map_groups__remove_overlappings(struct map_groups *self, * list. */ list_add_tail(&pos->node, &self->removed_maps[map->type]); + /* + * Now check if we need to create new maps for areas not + * overlapped by the new map: + */ + if (map->start > pos->start) { + struct map *before = map__clone(pos); + + if (before == NULL) + return -ENOMEM; + + before->end = map->start - 1; + map_groups__insert(self, before); + if (verbose >= 2) + map__fprintf(before, stderr); + } + + if (map->end < pos->end) { + struct map *after = map__clone(pos); + + if (after == NULL) + return -ENOMEM; + + after->start = map->end + 1; + map_groups__insert(self, after); + if (verbose >= 2) + map__fprintf(after, stderr); + } } + + return 0; } void maps__insert(struct rb_root *maps, struct map *map) @@ -254,7 +283,7 @@ struct map *maps__find(struct rb_root *maps, u64 ip) void thread__insert_map(struct thread *self, struct map *map) { - map_groups__remove_overlappings(&self->mg, map); + map_groups__fixup_overlappings(&self->mg, map); map_groups__insert(&self->mg, map); } -- cgit v1.2.3 From accd3cc45a0e1d11090ea66888405987de77bdca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2010 12:51:04 -0300 Subject: perf probe: Add missing variable initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cc1: warnings being treated as errors util/probe-finder.c: In function 'find_line_range': util/probe-finder.c:172: warning: 'src' may be used uninitialized in this function make: *** [util/probe-finder.o] Error 1 Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/probe-finder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e77dc886760e..1e6c65ebbd80 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -169,7 +169,7 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname) { Dwarf_Files *files; size_t nfiles, i; - const char *src; + const char *src = NULL; int ret; if (!fname) -- cgit v1.2.3 From 8907fd607b66e36636469a2de9833db643869db8 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:05 -0300 Subject: perf record: Add ID and to recorded event data when recording multiple events Currently perf record does not write the ID or the to disk for events. This doesn't allow report to tell if an event stream contains one or more types of events. This patch adds this entry to the list of data that record will write to disk if more than one event was requested. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 771533ced6a8..f573bbb83572 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -244,6 +244,9 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (nr_counters > 1) + attr->sample_type |= PERF_SAMPLE_ID; + if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; attr->freq = 1; -- cgit v1.2.3 From d403d0acc9c5afa679a3f61e71489530d7fa0606 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:06 -0300 Subject: perf session: Change add_hist_entry to take the tree root instead of session In order to minimize the impact of storing multiple events in a report this function will now take the root of the histogram tree so that the logic for selecting the proper tree can be inserted before the call. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 3 ++- tools/perf/builtin-report.c | 3 ++- tools/perf/util/hist.c | 6 +++--- tools/perf/util/hist.h | 3 ++- 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5ec5de995872..4b734c731e27 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -116,7 +116,7 @@ static int perf_session__add_hist_entry(struct perf_session *self, return 0; } - he = __perf_session__add_hist_entry(self, al, NULL, count, &hit); + he = __perf_session__add_hist_entry(&self->hists, al, NULL, count, &hit); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 18b3f505f9db..20df7352629b 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -26,7 +26,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, struct addr_location *al, u64 count) { bool hit; - struct hist_entry *he = __perf_session__add_hist_entry(self, al, NULL, + struct hist_entry *he = __perf_session__add_hist_entry(&self->hists, + al, NULL, count, &hit); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cfc655d40bb7..cd16e6a7d6d0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -56,7 +56,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, if ((sort__has_parent || symbol_conf.use_callchain) && chain) syms = perf_session__resolve_callchain(self, al->thread, chain, &parent); - he = __perf_session__add_hist_entry(self, al, parent, count, &hit); + he = __perf_session__add_hist_entry(&self->hists, al, parent, + count, &hit); if (he == NULL) return -ENOMEM; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e8daf5ca6fd2..55dd9115d1b4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -12,12 +12,12 @@ struct callchain_param callchain_param = { * histogram, sorted on item, collects counts */ -struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, +struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, struct addr_location *al, struct symbol *sym_parent, u64 count, bool *hit) { - struct rb_node **p = &self->hists.rb_node; + struct rb_node **p = &hists->rb_node; struct rb_node *parent = NULL; struct hist_entry *he; struct hist_entry entry = { @@ -53,7 +53,7 @@ struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, return NULL; *he = entry; rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &self->hists); + rb_insert_color(&he->rb_node, hists); *hit = false; return he; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e5f99b24048b..7b48590c3ee8 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -10,8 +10,9 @@ struct perf_session; struct hist_entry; struct addr_location; struct symbol; +struct rb_root; -struct hist_entry *__perf_session__add_hist_entry(struct perf_session *self, +struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, struct addr_location *al, struct symbol *parent, u64 count, bool *hit); -- cgit v1.2.3 From cb8f09393646c5058056db771583c86e0ed1d92f Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:07 -0300 Subject: perf session: Add storage for seperating event types in report This patch adds the structures necessary to count each event type independently in perf report. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/event.h | 9 +++++++++ tools/perf/util/session.c | 1 + tools/perf/util/session.h | 1 + 3 files changed, 11 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 50a7132887f5..a33b94952e34 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -99,6 +99,15 @@ struct events_stats { u64 lost; }; +struct event_stat_id { + struct rb_node rb_node; + struct rb_root hists; + struct events_stats stats; + u64 config; + u64 event_stream; + u32 type; +}; + void event__print_totals(void); struct perf_session; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0de7258e70a5..eed1cb889008 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -70,6 +70,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc memcpy(self->filename, filename, len); self->threads = RB_ROOT; + self->stats_by_id = RB_ROOT; self->last_match = NULL; self->mmap_window = 32; self->cwd = NULL; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 31950fcd8a4d..5c33417eebb3 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,6 +20,7 @@ struct perf_session { struct thread *last_match; struct map *vmlinux_maps[MAP__NR_TYPES]; struct events_stats events_stats; + struct rb_root stats_by_id; unsigned long event_total[PERF_RECORD_MAX]; unsigned long unknown_events; struct rb_root hists; -- cgit v1.2.3 From eefc465cdd49cb89a742083fac2807c718ddad31 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:08 -0300 Subject: perf session: Change perf_session post processing functions to take histogram tree Now that report can store historgrams for multiple events we need to be able to do the post processing work for each histogram. This patch changes the post processing functions so that they can be called individually for each event's histogram. Signed-off-by: Eric B Munson [ Guarantee bisectabilty by fixing up builtin-report.c ] Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-diff.c | 10 ++++++---- tools/perf/builtin-report.c | 8 +++++--- tools/perf/util/hist.c | 39 ++++++++++++++++++++------------------- tools/perf/util/hist.h | 9 +++++---- 5 files changed, 38 insertions(+), 32 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 4b734c731e27..6ad7148451c5 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -564,8 +564,8 @@ static int __cmd_annotate(void) if (verbose > 2) dsos__fprintf(stdout); - perf_session__collapse_resort(session); - perf_session__output_resort(session, session->event_total[0]); + perf_session__collapse_resort(&session->hists); + perf_session__output_resort(&session->hists, session->event_total[0]); perf_session__find_annotations(session); out_delete: perf_session__delete(session); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 20df7352629b..1ea15d8aeed1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -115,7 +115,7 @@ static void perf_session__resort_hist_entries(struct perf_session *self) static void perf_session__set_hist_entries_positions(struct perf_session *self) { - perf_session__output_resort(self, self->events_stats.total); + perf_session__output_resort(&self->hists, self->events_stats.total); perf_session__resort_hist_entries(self); } @@ -167,13 +167,15 @@ static int __cmd_diff(void) goto out_delete; } - perf_session__output_resort(session[1], session[1]->events_stats.total); + perf_session__output_resort(&session[1]->hists, + session[1]->events_stats.total); if (show_displacement) perf_session__set_hist_entries_positions(session[0]); perf_session__match_hists(session[0], session[1]); - perf_session__fprintf_hists(session[1], session[0], - show_displacement, stdout); + perf_session__fprintf_hists(&session[1]->hists, session[0], + show_displacement, stdout, + session[1]->events_stats.total); out_delete: for (i = 0; i < 2; ++i) perf_session__delete(session[i]); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cd16e6a7d6d0..294b4cf105f2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -225,10 +225,12 @@ static int __cmd_report(void) if (verbose > 2) dsos__fprintf(stdout); - perf_session__collapse_resort(session); - perf_session__output_resort(session, session->events_stats.total); + perf_session__collapse_resort(&session->hists); + perf_session__output_resort(&session->hists, + session->events_stats.total); fprintf(stdout, "# Samples: %Ld\n#\n", session->events_stats.total); - perf_session__fprintf_hists(session, NULL, false, stdout); + perf_session__fprintf_hists(&session->hists, NULL, false, stdout, + session->events_stats.total); if (sort_order == default_sort_order && parent_pattern == default_parent_pattern) fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 55dd9115d1b4..73ebb6fb34ac 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -130,7 +130,7 @@ static void collapse__insert_entry(struct rb_root *root, struct hist_entry *he) rb_insert_color(&he->rb_node, root); } -void perf_session__collapse_resort(struct perf_session *self) +void perf_session__collapse_resort(struct rb_root *hists) { struct rb_root tmp; struct rb_node *next; @@ -140,17 +140,17 @@ void perf_session__collapse_resort(struct perf_session *self) return; tmp = RB_ROOT; - next = rb_first(&self->hists); + next = rb_first(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->hists); + rb_erase(&n->rb_node, hists); collapse__insert_entry(&tmp, n); } - self->hists = tmp; + *hists = tmp; } /* @@ -183,7 +183,7 @@ static void perf_session__insert_output_hist_entry(struct rb_root *root, rb_insert_color(&he->rb_node, root); } -void perf_session__output_resort(struct perf_session *self, u64 total_samples) +void perf_session__output_resort(struct rb_root *hists, u64 total_samples) { struct rb_root tmp; struct rb_node *next; @@ -194,18 +194,18 @@ void perf_session__output_resort(struct perf_session *self, u64 total_samples) total_samples * (callchain_param.min_percent / 100); tmp = RB_ROOT; - next = rb_first(&self->hists); + next = rb_first(hists); while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &self->hists); + rb_erase(&n->rb_node, hists); perf_session__insert_output_hist_entry(&tmp, n, min_callchain_hits); } - self->hists = tmp; + *hists = tmp; } static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -456,10 +456,10 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, } static size_t hist_entry__fprintf(struct hist_entry *self, - struct perf_session *session, struct perf_session *pair_session, bool show_displacement, - long displacement, FILE *fp) + long displacement, FILE *fp, + u64 session_total) { struct sort_entry *se; u64 count, total; @@ -474,7 +474,7 @@ static size_t hist_entry__fprintf(struct hist_entry *self, total = pair_session->events_stats.total; } else { count = self->count; - total = session->events_stats.total; + total = session_total; } if (total) @@ -496,8 +496,8 @@ static size_t hist_entry__fprintf(struct hist_entry *self, if (total > 0) old_percent = (count * 100.0) / total; - if (session->events_stats.total > 0) - new_percent = (self->count * 100.0) / session->events_stats.total; + if (session_total > 0) + new_percent = (self->count * 100.0) / session_total; diff = new_percent - old_percent; @@ -544,16 +544,17 @@ static size_t hist_entry__fprintf(struct hist_entry *self, left_margin -= thread__comm_len(self->thread); } - hist_entry_callchain__fprintf(fp, self, session->events_stats.total, + hist_entry_callchain__fprintf(fp, self, session_total, left_margin); } return ret; } -size_t perf_session__fprintf_hists(struct perf_session *self, +size_t perf_session__fprintf_hists(struct rb_root *hists, struct perf_session *pair, - bool show_displacement, FILE *fp) + bool show_displacement, FILE *fp, + u64 session_total) { struct sort_entry *se; struct rb_node *nd; @@ -641,7 +642,7 @@ size_t perf_session__fprintf_hists(struct perf_session *self, fprintf(fp, "\n#\n"); print_entries: - for (nd = rb_first(&self->hists); nd; nd = rb_next(nd)) { + for (nd = rb_first(hists); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); if (show_displacement) { @@ -652,8 +653,8 @@ print_entries: displacement = 0; ++position; } - ret += hist_entry__fprintf(h, self, pair, show_displacement, - displacement, fp); + ret += hist_entry__fprintf(h, pair, show_displacement, + displacement, fp, session_total); } free(rem_sq_bracket); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 7b48590c3ee8..16f360cce5bf 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -20,9 +20,10 @@ extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); void hist_entry__free(struct hist_entry *); -void perf_session__output_resort(struct perf_session *self, u64 total_samples); -void perf_session__collapse_resort(struct perf_session *self); -size_t perf_session__fprintf_hists(struct perf_session *self, +void perf_session__output_resort(struct rb_root *hists, u64 total_samples); +void perf_session__collapse_resort(struct rb_root *hists); +size_t perf_session__fprintf_hists(struct rb_root *hists, struct perf_session *pair, - bool show_displacement, FILE *fp); + bool show_displacement, FILE *fp, + u64 session_total); #endif /* __PERF_HIST_H */ -- cgit v1.2.3 From cbbc79a53278b83bf7f834127751459f9299e402 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Fri, 5 Mar 2010 12:51:09 -0300 Subject: perf report: Add multiple event support Perf report does not handle multiple events being reported, even though perf record stores them properly on disk. This patch addresses that issue by adding the logic to perf report to use the event stream id that is saved by record and the new data structures to seperate the event streams and report them individually. Signed-off-by: Eric B Munson Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1267804269-22660-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 115 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 15 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 294b4cf105f2..f815de25d0fc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -45,29 +45,71 @@ static char *pretty_printing_style = default_pretty_printing_style; static char callchain_default_opt[] = "fractal,0.5"; +static struct event_stat_id *get_stats(struct perf_session *self, + u64 event_stream, u32 type, u64 config) +{ + struct rb_node **p = &self->stats_by_id.rb_node; + struct rb_node *parent = NULL; + struct event_stat_id *iter, *new; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct event_stat_id, rb_node); + if (iter->config == config) + return iter; + + + if (config > iter->config) + p = &(*p)->rb_right; + else + p = &(*p)->rb_left; + } + + new = malloc(sizeof(struct event_stat_id)); + if (new == NULL) + return NULL; + memset(new, 0, sizeof(struct event_stat_id)); + new->event_stream = event_stream; + new->config = config; + new->type = type; + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &self->stats_by_id); + return new; +} + static int perf_session__add_hist_entry(struct perf_session *self, struct addr_location *al, - struct ip_callchain *chain, u64 count) + struct sample_data *data) { struct symbol **syms = NULL, *parent = NULL; bool hit; struct hist_entry *he; + struct event_stat_id *stats; + struct perf_event_attr *attr; - if ((sort__has_parent || symbol_conf.use_callchain) && chain) + if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) syms = perf_session__resolve_callchain(self, al->thread, - chain, &parent); - he = __perf_session__add_hist_entry(&self->hists, al, parent, - count, &hit); + data->callchain, &parent); + + attr = perf_header__find_attr(data->id, &self->header); + if (attr) + stats = get_stats(self, data->id, attr->type, attr->config); + else + stats = get_stats(self, data->id, 0, 0); + if (stats == NULL) + return -ENOMEM; + he = __perf_session__add_hist_entry(&stats->hists, al, parent, + data->period, &hit); if (he == NULL) return -ENOMEM; if (hit) - he->count += count; + he->count += data->period; if (symbol_conf.use_callchain) { if (!hit) callchain_init(&he->callchain); - append_chain(&he->callchain, chain, syms); + append_chain(&he->callchain, data->callchain, syms); free(syms); } @@ -87,10 +129,30 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) return 0; } +static int add_event_total(struct perf_session *session, + struct sample_data *data, + struct perf_event_attr *attr) +{ + struct event_stat_id *stats; + + if (attr) + stats = get_stats(session, data->id, attr->type, attr->config); + else + stats = get_stats(session, data->id, 0, 0); + + if (!stats) + return -ENOMEM; + + stats->stats.total += data->period; + session->events_stats.total += data->period; + return 0; +} + static int process_sample_event(event_t *event, struct perf_session *session) { struct sample_data data = { .period = 1, }; struct addr_location al; + struct perf_event_attr *attr; event__parse_sample(event, session->sample_type, &data); @@ -124,12 +186,18 @@ static int process_sample_event(event_t *event, struct perf_session *session) if (al.filtered || (hide_unresolved && al.sym == NULL)) return 0; - if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) { + if (perf_session__add_hist_entry(session, &al, &data)) { pr_debug("problem incrementing symbol count, skipping event\n"); return -1; } - session->events_stats.total += data.period; + attr = perf_header__find_attr(data.id, &session->header); + + if (add_event_total(session, &data, attr)) { + pr_debug("problem adding event count\n"); + return -1; + } + return 0; } @@ -198,6 +266,7 @@ static int __cmd_report(void) { int ret = -EINVAL; struct perf_session *session; + struct rb_node *next; session = perf_session__new(input_name, O_RDONLY, force); if (session == NULL) @@ -225,12 +294,28 @@ static int __cmd_report(void) if (verbose > 2) dsos__fprintf(stdout); - perf_session__collapse_resort(&session->hists); - perf_session__output_resort(&session->hists, - session->events_stats.total); - fprintf(stdout, "# Samples: %Ld\n#\n", session->events_stats.total); - perf_session__fprintf_hists(&session->hists, NULL, false, stdout, - session->events_stats.total); + next = rb_first(&session->stats_by_id); + while (next) { + struct event_stat_id *stats; + + stats = rb_entry(next, struct event_stat_id, rb_node); + perf_session__collapse_resort(&stats->hists); + perf_session__output_resort(&stats->hists, stats->stats.total); + if (rb_first(&session->stats_by_id) == + rb_last(&session->stats_by_id)) + fprintf(stdout, "# Samples: %Ld\n#\n", + stats->stats.total); + else + fprintf(stdout, "# Samples: %Ld %s\n#\n", + stats->stats.total, + __event_name(stats->type, stats->config)); + + perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, + stats->stats.total); + fprintf(stdout, "\n\n"); + next = rb_next(&stats->rb_node); + } + if (sort_order == default_sort_order && parent_pattern == default_parent_pattern) fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); -- cgit v1.2.3 From 65f2ed2b2fa6034ef9890b60c8fd39fbe76b9d37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Mar 2010 15:58:17 -0300 Subject: perf report: Print the map table just after samples for which no map was found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If -vv is used just the map table will be printed, -vvv will print the symbol table too, with it we can see that we have a bug where some samples are not being resolved to a map when we get them in the perf.data stream, but after we have it all processed, we can find the right map, some reordering probably is happening. Upcoming patches will provide ways to ask for most PERF_SAMPLE_ conditional samples to be taken for !PERF_RECORD_SAMPLE events too, then we'll be able to ask for PERF_SAMPLE_TIME and PERF_SAMPLE_CPU to help diagnose this. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1268161097-17761-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/util/hist.c | 5 +++++ tools/perf/util/thread.c | 6 +++--- tools/perf/util/thread.h | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 73ebb6fb34ac..bdcfd6190b21 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -655,6 +655,11 @@ print_entries: } ret += hist_entry__fprintf(h, pair, show_displacement, displacement, fp, session_total); + if (h->map == NULL && verbose > 1) { + __map_groups__fprintf_maps(&h->thread->mg, + MAP__FUNCTION, fp); + fprintf(fp, "%.10s end\n", graph_dotted_line); + } } free(rem_sq_bracket); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9024fa1ff5c2..fa968312ee7d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -79,8 +79,8 @@ int thread__comm_len(struct thread *self) return self->comm_len; } -static size_t __map_groups__fprintf_maps(struct map_groups *self, - enum map_type type, FILE *fp) +size_t __map_groups__fprintf_maps(struct map_groups *self, + enum map_type type, FILE *fp) { size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); struct rb_node *nd; @@ -89,7 +89,7 @@ static size_t __map_groups__fprintf_maps(struct map_groups *self, struct map *pos = rb_entry(nd, struct map, rb_node); printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); - if (verbose > 1) { + if (verbose > 2) { printed += dso__fprintf(pos->dso, type, fp); printed += fprintf(fp, "--\n"); } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 0a28f39de545..dcf70303e58e 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -10,6 +10,9 @@ struct map_groups { struct list_head removed_maps[MAP__NR_TYPES]; }; +size_t __map_groups__fprintf_maps(struct map_groups *self, + enum map_type type, FILE *fp); + struct thread { struct rb_node rb_node; struct map_groups mg; -- cgit v1.2.3 From a12b51c478899fe0b7e874a559b05ba35f1128ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 Mar 2010 20:36:09 +1100 Subject: perf tools: Fix sparse CPU numbering related bugs At present, the perf subcommands that do system-wide monitoring (perf stat, perf record and perf top) don't work properly unless the online cpus are numbered 0, 1, ..., N-1. These tools ask for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN) and then try to create events for cpus 0, 1, ..., N-1. This creates problems for systems where the online cpus are numbered sparsely. For example, a POWER6 system in single-threaded mode (i.e. only running 1 hardware thread per core) will have only even-numbered cpus online. This fixes the problem by reading the /sys/devices/system/cpu/online file to find out which cpus are online. The code that does that is in tools/perf/util/cpumap.[ch], and consists of a read_cpu_map() function that sets up a cpumap[] array and returns the number of online cpus. If /sys/devices/system/cpu/online can't be read or can't be parsed successfully, it falls back to using sysconf to ask how many cpus are online and sets up an identity map in cpumap[]. The perf record, perf stat and perf top code then calls read_cpu_map() in the system-wide monitoring case (instead of sysconf) and uses cpumap[] to get the cpu numbers to pass to perf_event_open. Signed-off-by: Paul Mackerras Cc: Anton Blanchard Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 ++ tools/perf/builtin-record.c | 7 +++--- tools/perf/builtin-stat.c | 10 +++++--- tools/perf/builtin-top.c | 9 +++---- tools/perf/util/cpumap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cpumap.h | 7 ++++++ 6 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 tools/perf/util/cpumap.c create mode 100644 tools/perf/util/cpumap.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2d537382c686..5840499e2d22 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -387,6 +387,7 @@ LIB_H += util/thread.h LIB_H += util/trace-event.h LIB_H += util/probe-finder.h LIB_H += util/probe-event.h +LIB_H += util/cpumap.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -433,6 +434,7 @@ LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/probe-event.o LIB_OBJS += util/util.o +LIB_OBJS += util/cpumap.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f573bbb83572..b09d3b27ca14 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -22,6 +22,7 @@ #include "util/debug.h" #include "util/session.h" #include "util/symbol.h" +#include "util/cpumap.h" #include #include @@ -421,9 +422,6 @@ static int __cmd_record(int argc, const char **argv) char buf; page_size = sysconf(_SC_PAGE_SIZE); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); atexit(sig_atexit); signal(SIGCHLD, sig_handler); @@ -547,8 +545,9 @@ static int __cmd_record(int argc, const char **argv) if ((!system_wide && !inherit) || profile_cpu != -1) { open_counters(profile_cpu, target_pid); } else { + nr_cpus = read_cpu_map(); for (i = 0; i < nr_cpus; i++) - open_counters(i, target_pid); + open_counters(cpumap[i], target_pid); } if (file_new) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e8c85d5aec41..95db31cff6fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -45,6 +45,7 @@ #include "util/event.h" #include "util/debug.h" #include "util/header.h" +#include "util/cpumap.h" #include #include @@ -151,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid) unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); if (fd[cpu][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[cpu][counter], strerror(errno)); @@ -519,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) nr_counters = ARRAY_SIZE(default_attrs); } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert((int)nr_cpus >= 0); + if (system_wide) + nr_cpus = read_cpu_map(); + else + nr_cpus = 1; /* * We dont want to block the signals - that would cause diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 31f2e597800c..0b719e3dde05 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -28,6 +28,7 @@ #include #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/cpumap.h" #include "util/debug.h" @@ -1123,7 +1124,7 @@ static void start_counter(int i, int counter) cpu = profile_cpu; if (target_pid == -1 && profile_cpu == -1) - cpu = i; + cpu = cpumap[i]; attr = attrs + counter; @@ -1347,12 +1348,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) attrs[counter].sample_period = default_interval; } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; + else + nr_cpus = read_cpu_map(); get_term_dimensions(&winsize); if (print_entries == 0) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c new file mode 100644 index 000000000000..4e01490e51e5 --- /dev/null +++ b/tools/perf/util/cpumap.c @@ -0,0 +1,59 @@ +#include "util.h" +#include "../perf.h" +#include "cpumap.h" +#include +#include + +int cpumap[MAX_NR_CPUS]; + +static int default_cpu_map(void) +{ + int nr_cpus, i; + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert((int)nr_cpus >= 0); + + for (i = 0; i < nr_cpus; ++i) + cpumap[i] = i; + + return nr_cpus; +} + +int read_cpu_map(void) +{ + FILE *onlnf; + int nr_cpus = 0; + int n, cpu, prev; + char sep; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (!onlnf) + return default_cpu_map(); + + sep = 0; + prev = -1; + for (;;) { + n = fscanf(onlnf, "%u%c", &cpu, &sep); + if (n <= 0) + break; + if (prev >= 0) { + assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS); + while (++prev < cpu) + cpumap[nr_cpus++] = prev; + } + assert (nr_cpus < MAX_NR_CPUS); + cpumap[nr_cpus++] = cpu; + if (n == 2 && sep == '-') + prev = cpu; + else + prev = -1; + if (n == 1 || sep == '\n') + break; + } + fclose(onlnf); + if (nr_cpus > 0) + return nr_cpus; + + return default_cpu_map(); +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h new file mode 100644 index 000000000000..86c78bb33098 --- /dev/null +++ b/tools/perf/util/cpumap.h @@ -0,0 +1,7 @@ +#ifndef __PERF_CPUMAP_H +#define __PERF_CPUMAP_H + +extern int read_cpu_map(void); +extern int cpumap[]; + +#endif /* __PERF_CPUMAP_H */ -- cgit v1.2.3 From 7ae5f21361fea11f58c398701da635f778635d13 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 11 Mar 2010 13:57:00 +0100 Subject: perf: Make the install relative to DESTDIR if specified Without this change, the install path is relative to prefix/DESTDIR where prefix is automatically set to $HOME. This can produce unexpected results. For example: make -C tools/perf DESTDIR=/home/jkacur/tmp install-man creates the directory: /home/jkacur/home/jkacur/tmp/share/... instead of the expected: /home/jkacur/tmp/share/... Signed-off-by: John Kacur Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Tom Zanussi Cc: Kyle McMartin Cc: LKML-Reference: <1268312220-12880-1-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/Makefile | 4 +++- tools/perf/Makefile | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index bdd3b7ecad0a..bd498d496952 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -24,7 +24,10 @@ DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR prefix?=$(HOME) +endif bindir?=$(prefix)/bin htmldir?=$(prefix)/share/doc/perf-doc pdfdir?=$(prefix)/share/doc/perf-doc @@ -32,7 +35,6 @@ mandir?=$(prefix)/share/man man1dir=$(mandir)/man1 man5dir=$(mandir)/man5 man7dir=$(mandir)/man7 -# DESTDIR= ASCIIDOC=asciidoc ASCIIDOC_EXTRA = --unsafe diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5840499e2d22..8a8f52db7e38 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -216,7 +216,10 @@ STRIP ?= strip # runtime figures out where they are based on the path to the executable. # This can help installing the suite in a relocatable way. +# Make the path relative to DESTDIR, not to prefix +ifndef DESTDIR prefix = $(HOME) +endif bindir_relative = bin bindir = $(prefix)/$(bindir_relative) mandir = share/man @@ -233,7 +236,6 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif lib = lib -# DESTDIR= export prefix bindir sharedir sysconfdir -- cgit v1.2.3 From 9f591fd76afdc0e5192e9ed00a36f8efc0b4dfe6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 11 Mar 2010 15:53:11 -0300 Subject: perf record: Don't try to find buildids in a zero sized file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixing this symptom: [acme@mica linux-2.6-tip]$ perf record -a -f Fatal: Permission error - are you root? Bus error [acme@mica linux-2.6-tip]$ I.e. if for some reason no data is collected, in this case a non root user trying to do systemwide profiling, no data will be collected, and then we end up trying to mmap a zero sized file and access the file header, b00m. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: LKML-Reference: <1268333592-30872-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b09d3b27ca14..3b8b6387c47c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -395,6 +395,9 @@ static int process_buildids(void) { u64 size = lseek(output, 0, SEEK_CUR); + if (size == 0) + return 0; + session->fd = output; return __perf_session__process_events(session, post_processing_offset, size - post_processing_offset, -- cgit v1.2.3