From 7d073b335edc8d97af730c2e3b83ed6642bd3c27 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 21 Oct 2014 17:09:58 -0700 Subject: perf tools powerpc: Cache the DWARF debug info Cache the DWARF debug info for DSO so we don't have to rebuild it for each address in the DSO. Note that dso__new() uses calloc() so don't need to set dso->dwfl to NULL. $ /tmp/perf.orig --version perf version 3.18.rc1.gc2661b8 $ /tmp/perf.new --version perf version 3.18.rc1.g402d62 $ perf stat -e cycles,instructions /tmp/perf.orig report -g > orig Performance counter stats for '/tmp/perf.orig report -g': 6,428,177,183 cycles # 0.000 GHz 4,176,288,391 instructions # 0.65 insns per cycle 1.840666132 seconds time elapsed $ perf stat -e cycles,instructions /tmp/perf.new report -g > new Performance counter stats for '/tmp/perf.new report -g': 305,773,142 cycles # 0.000 GHz 276,048,272 instructions # 0.90 insns per cycle 0.087693543 seconds time elapsed $ diff orig new $ Changelog[v2]: [Arnaldo Carvalho] Cache in existing global objects rather than create new static/globals in functions. Reported-by: Anton Blanchard Signed-off-by: Sukadev Bhattiprolu Cc: Anton Blanchard Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20141022000958.GB2228@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index acb651acc7fd..3c9b391493f9 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -127,6 +127,7 @@ struct dso { const char *long_name; u16 long_name_len; u16 short_name_len; + void *dwfl; /* DWARF debug info */ /* dso data file */ struct { -- cgit v1.2.3 From 11246c708acdfa9512d7b69c18938810c20fd6ab Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 21 Oct 2014 17:29:02 -0300 Subject: perf tools: Set thread->mg.machine in all places We were setting this only in machine__init(), i.e. for the map_groups that holds the kernel module maps, not for the one used for a thread's executable mmaps. Now we are sure that we can obtain the machine where a thread is by going via thread->mg->machine, thus we can, in the following patch, make all codepaths that receive machine _and_ thread, drop the machine one. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-y6zgaqsvhrf04v57u15e4ybm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 5 ++--- tools/perf/util/map.c | 8 ++++---- tools/perf/util/map.h | 4 ++-- tools/perf/util/thread.c | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 34fc7c8672e4..c70b3ff7b289 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -21,7 +21,7 @@ static void dsos__init(struct dsos *dsos) int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { - map_groups__init(&machine->kmaps); + map_groups__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->user_dsos); dsos__init(&machine->kernel_dsos); @@ -32,7 +32,6 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->vdso_info = NULL; - machine->kmaps.machine = machine; machine->pid = pid; machine->symbol_filter = NULL; @@ -319,7 +318,7 @@ static void machine__update_thread_pid(struct machine *machine, goto out_err; if (!leader->mg) - leader->mg = map_groups__new(); + leader->mg = map_groups__new(machine); if (!leader->mg) goto out_err; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 2137c4596ec7..040a785c857b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -413,14 +413,14 @@ u64 map__objdump_2mem(struct map *map, u64 ip) return ip + map->reloc; } -void map_groups__init(struct map_groups *mg) +void map_groups__init(struct map_groups *mg, struct machine *machine) { int i; for (i = 0; i < MAP__NR_TYPES; ++i) { mg->maps[i] = RB_ROOT; INIT_LIST_HEAD(&mg->removed_maps[i]); } - mg->machine = NULL; + mg->machine = machine; mg->refcnt = 1; } @@ -471,12 +471,12 @@ bool map_groups__empty(struct map_groups *mg) return true; } -struct map_groups *map_groups__new(void) +struct map_groups *map_groups__new(struct machine *machine) { struct map_groups *mg = malloc(sizeof(*mg)); if (mg != NULL) - map_groups__init(mg); + map_groups__init(mg, machine); return mg; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 2f83954af050..6951a9d42339 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -64,7 +64,7 @@ struct map_groups { int refcnt; }; -struct map_groups *map_groups__new(void); +struct map_groups *map_groups__new(struct machine *machine); void map_groups__delete(struct map_groups *mg); bool map_groups__empty(struct map_groups *mg); @@ -150,7 +150,7 @@ void maps__remove(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 addr); struct map *maps__first(struct rb_root *maps); struct map *maps__next(struct map *map); -void map_groups__init(struct map_groups *mg); +void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct map_groups *mg, struct map_groups *parent, enum map_type type); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index c41411726c7a..8db9626f6835 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -15,7 +15,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine) pid_t pid = thread->pid_; if (pid == thread->tid || pid == -1) { - thread->mg = map_groups__new(); + thread->mg = map_groups__new(machine); } else { leader = machine__findnew_thread(machine, pid, pid); if (leader) -- cgit v1.2.3 From bb871a9c8d68692ed2513b3f0e1c010c2ac12f44 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Oct 2014 12:50:25 -0300 Subject: perf tools: A thread's machine can be found via thread->mg->machine So stop passing both machine and thread to several thread methods, reducing function signature length. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-ckcy19dcp1jfkmdihdjcqdn1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 5 ++-- tools/perf/builtin-inject.c | 3 +- tools/perf/builtin-script.c | 7 ++--- tools/perf/builtin-timechart.c | 2 +- tools/perf/builtin-trace.c | 6 ++-- tools/perf/tests/code-reading.c | 3 +- tools/perf/tests/mmap-thread-lookup.c | 2 +- tools/perf/util/build-id.c | 3 +- tools/perf/util/callchain.h | 6 ++-- tools/perf/util/event.c | 19 ++++++------ tools/perf/util/event.h | 1 - tools/perf/util/machine.c | 35 +++++++++-------------- tools/perf/util/thread.c | 4 +-- tools/perf/util/thread.h | 5 ++-- tools/perf/util/unwind-libdw.c | 4 +-- tools/perf/util/unwind-libunwind.c | 13 ++++----- 16 files changed, 48 insertions(+), 70 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 9892b0f0bec4..3bb50eac5542 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -232,8 +232,7 @@ out: * index: of callchain entry that needs to be ignored (if any) * -1 if no entry needs to be ignored or in case of errors */ -int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, - struct ip_callchain *chain) +int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) { struct addr_location al; struct dso *dso = NULL; @@ -246,7 +245,7 @@ int arch_skip_callchain_idx(struct machine *machine, struct thread *thread, ip = chain->ips[2]; - thread__find_addr_location(thread, machine, PERF_RECORD_MISC_USER, + thread__find_addr_location(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); if (al.map) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index de99ca1bb942..06f1758951f1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -217,8 +217,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->ip, &al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) { if (!al.map->dso->hit) { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9708a1290571..b35517f2ceb5 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -379,7 +379,6 @@ static void print_sample_start(struct perf_sample *sample, static void print_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct perf_event_attr *attr) { @@ -390,7 +389,7 @@ static void print_sample_addr(union perf_event *event, if (!sample_addr_correlates_sym(attr)) return; - perf_event__preprocess_sample_addr(event, sample, machine, thread, &al); + perf_event__preprocess_sample_addr(event, sample, thread, &al); if (PRINT_FIELD(SYM)) { printf(" "); @@ -438,7 +437,7 @@ static void print_sample_bts(union perf_event *event, ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && !output[attr->type].user_set)) { printf(" => "); - print_sample_addr(event, sample, al->machine, thread, attr); + print_sample_addr(event, sample, thread, attr); } if (print_srcline_last) @@ -475,7 +474,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, event_format__print(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) - print_sample_addr(event, sample, al->machine, thread, attr); + print_sample_addr(event, sample, thread, attr); if (PRINT_FIELD(IP)) { if (!symbol_conf.use_callchain) diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 35b425b6293f..f5fb256d90d5 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -528,7 +528,7 @@ static const char *cat_backtrace(union perf_event *event, } tal.filtered = 0; - thread__find_addr_location(al.thread, machine, cpumode, + thread__find_addr_location(al.thread, cpumode, MAP__FUNCTION, ip, &tal); if (tal.sym) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fb126459b134..83a4835c8118 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1846,7 +1846,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) return 0; - thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION, + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); @@ -1859,11 +1859,11 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, trace->host, cpumode, MAP__VARIABLE, + thread__find_addr_location(thread, cpumode, MAP__VARIABLE, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, trace->host, cpumode, + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, sample->addr, &al); if (al.map) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 67f2d6323558..144a41236456 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -145,8 +145,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, addr, - &al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); if (!al.map || !al.map->dso) { pr_debug("thread__find_addr_map failed\n"); return -1; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 4a456fef66ca..2113f1c8611f 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -187,7 +187,7 @@ static int mmap_events(synth_cb synth) pr_debug("looking for map %p\n", td->map); - thread__find_addr_map(thread, machine, + thread__find_addr_map(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, (unsigned long) (td->map + 1), &al); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index a904a4cfe7d3..2e7c68e39330 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -33,8 +33,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->ip, &al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) al.map->dso->hit = 1; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 94cfefddf4db..3caccc2c173c 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -184,11 +184,9 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, } #ifdef HAVE_SKIP_CALLCHAIN_IDX -extern int arch_skip_callchain_idx(struct machine *machine, - struct thread *thread, struct ip_callchain *chain); +extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); #else -static inline int arch_skip_callchain_idx(struct machine *machine __maybe_unused, - struct thread *thread __maybe_unused, +static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, struct ip_callchain *chain __maybe_unused) { return -1; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4af6b279e34a..e00a29fb099f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -730,12 +730,12 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, return machine__process_event(machine, event, sample); } -void thread__find_addr_map(struct thread *thread, - struct machine *machine, u8 cpumode, +void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al) { struct map_groups *mg = thread->mg; + struct machine *machine = mg->machine; bool load_map = false; al->machine = machine; @@ -806,14 +806,14 @@ try_again: } } -void thread__find_addr_location(struct thread *thread, struct machine *machine, +void thread__find_addr_location(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al) { - thread__find_addr_map(thread, machine, cpumode, type, addr, al); + thread__find_addr_map(thread, cpumode, type, addr, al); if (al->map != NULL) al->sym = map__find_symbol(al->map, al->addr, - machine->symbol_filter); + thread->mg->machine->symbol_filter); else al->sym = NULL; } @@ -842,8 +842,7 @@ int perf_event__preprocess_sample(const union perf_event *event, machine->vmlinux_maps[MAP__FUNCTION] == NULL) machine__create_kernel_maps(machine); - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->ip, al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : ""); @@ -902,16 +901,14 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void perf_event__preprocess_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct addr_location *al) { u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION, - sample->addr, al); + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al); if (!al->map) - thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE, + thread__find_addr_map(thread, cpumode, MAP__VARIABLE, sample->addr, al); al->cpu = sample->cpu; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 5699e7e2a790..5f0e0b89e130 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -322,7 +322,6 @@ bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); void perf_event__preprocess_sample_addr(union perf_event *event, struct perf_sample *sample, - struct machine *machine, struct thread *thread, struct addr_location *al); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c70b3ff7b289..08e63fdbd14f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1289,7 +1289,7 @@ static bool symbol__match_regex(struct symbol *sym, regex_t *regex) return 0; } -static void ip__resolve_ams(struct machine *machine, struct thread *thread, +static void ip__resolve_ams(struct thread *thread, struct addr_map_symbol *ams, u64 ip) { @@ -1303,7 +1303,7 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, machine, MAP__FUNCTION, ip, &al); + thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -1311,23 +1311,21 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread, ams->map = al.map; } -static void ip__resolve_data(struct machine *machine, struct thread *thread, +static void ip__resolve_data(struct thread *thread, u8 m, struct addr_map_symbol *ams, u64 addr) { struct addr_location al; memset(&al, 0, sizeof(al)); - thread__find_addr_location(thread, machine, m, MAP__VARIABLE, addr, - &al); + thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al); if (al.map == NULL) { /* * some shared data regions have execute bit set which puts * their mapping in the MAP__FUNCTION type array. * Check there as a fallback option before dropping the sample. */ - thread__find_addr_location(thread, machine, m, MAP__FUNCTION, addr, - &al); + thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al); } ams->addr = addr; @@ -1344,9 +1342,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, if (!mi) return NULL; - ip__resolve_ams(al->machine, al->thread, &mi->iaddr, sample->ip); - ip__resolve_data(al->machine, al->thread, al->cpumode, - &mi->daddr, sample->addr); + ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); + ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr); mi->data_src.val = sample->data_src; return mi; @@ -1363,15 +1360,14 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(al->machine, al->thread, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(al->machine, al->thread, &bi[i].from, bs->entries[i].from); + ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); bi[i].flags = bs->entries[i].flags; } return bi; } -static int machine__resolve_callchain_sample(struct machine *machine, - struct thread *thread, +static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain, struct symbol **parent, struct addr_location *root_al, @@ -1395,7 +1391,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - skip_idx = arch_skip_callchain_idx(machine, thread, chain); + skip_idx = arch_skip_callchain_idx(thread, chain); for (i = 0; i < chain_nr; i++) { u64 ip; @@ -1437,7 +1433,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, } al.filtered = 0; - thread__find_addr_location(thread, machine, cpumode, + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, ip, &al); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -1476,11 +1472,8 @@ int machine__resolve_callchain(struct machine *machine, struct addr_location *root_al, int max_stack) { - int ret; - - ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent, - root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, sample->callchain, + parent, root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8db9626f6835..bf5bf858b7f6 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -198,7 +198,6 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } void thread__find_cpumode_addr_location(struct thread *thread, - struct machine *machine, enum map_type type, u64 addr, struct addr_location *al) { @@ -211,8 +210,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_addr_location(thread, machine, cpumodes[i], type, - addr, al); + thread__find_addr_location(thread, cpumodes[i], type, addr, al); if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 8c75fa774706..6ef9fe6ff8da 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -54,16 +54,15 @@ void thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); -void thread__find_addr_map(struct thread *thread, struct machine *machine, +void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); -void thread__find_addr_location(struct thread *thread, struct machine *machine, +void thread__find_addr_location(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, - struct machine *machine, enum map_type type, u64 addr, struct addr_location *al); diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7419768c38b1..f24b350ab192 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -26,7 +26,7 @@ static int __report_module(struct addr_location *al, u64 ip, Dwfl_Module *mod; struct dso *dso = NULL; - thread__find_addr_location(ui->thread, ui->machine, + thread__find_addr_location(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, al); @@ -89,7 +89,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, struct addr_location al; ssize_t size; - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, addr, &al); if (!al.map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 4d45c0dfe343..29acc8cccb56 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -284,7 +284,7 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui) { struct addr_location al; - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); return al.map; } @@ -374,7 +374,7 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, struct addr_location al; ssize_t size; - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, addr, &al); if (!al.map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); @@ -476,14 +476,13 @@ static void put_unwind_info(unw_addr_space_t __maybe_unused as, pr_debug("unwind: put_unwind_info called\n"); } -static int entry(u64 ip, struct thread *thread, struct machine *machine, +static int entry(u64 ip, struct thread *thread, unwind_entry_cb_t cb, void *arg) { struct unwind_entry e; struct addr_location al; - thread__find_addr_location(thread, machine, - PERF_RECORD_MISC_USER, + thread__find_addr_location(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); e.ip = ip; @@ -586,7 +585,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, unw_word_t ip; unw_get_reg(&c, UNW_REG_IP, &ip); - ret = ip ? entry(ip, ui->thread, ui->machine, cb, arg) : 0; + ret = ip ? entry(ip, ui->thread, cb, arg) : 0; } return ret; @@ -611,7 +610,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (ret) return ret; - ret = entry(ip, thread, machine, cb, arg); + ret = entry(ip, thread, cb, arg); if (ret) return -ENOMEM; -- cgit v1.2.3 From cc8b7c2bf553151a579a8009020875faa1d43e29 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Oct 2014 15:26:17 -0300 Subject: perf thread: Adopt resolve_callchain method from machine Shortening function signature lenght too, since a thread's machine can be obtained from thread->mg->machine, no need to pass thread, machine. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5wb6css280ty0cel5p0zo2b1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 4 ++-- tools/perf/util/machine.c | 15 +++++++-------- tools/perf/util/machine.h | 13 ++++++------- tools/perf/util/scripting-engines/trace-event-python.c | 6 +++--- tools/perf/util/session.c | 6 +++--- 5 files changed, 21 insertions(+), 23 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index c84d3f8dcb75..00229809a904 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -754,8 +754,8 @@ int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || sort__has_parent) { - return machine__resolve_callchain(al->machine, evsel, al->thread, - sample, parent, al, max_stack); + return thread__resolve_callchain(al->thread, evsel, sample, + parent, al, max_stack); } return 0; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 08e63fdbd14f..fd192e4885cc 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1464,13 +1464,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) entry->map, entry->sym); } -int machine__resolve_callchain(struct machine *machine, - struct perf_evsel *evsel, - struct thread *thread, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +int thread__resolve_callchain(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) { int ret = thread__resolve_callchain_sample(thread, sample->callchain, parent, root_al, max_stack); @@ -1487,7 +1486,7 @@ int machine__resolve_callchain(struct machine *machine, (!sample->user_stack.size)) return 0; - return unwind__get_entries(unwind_entry, &callchain_cursor, machine, + return unwind__get_entries(unwind_entry, &callchain_cursor, thread->mg->machine, thread, sample, max_stack); } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2b651a7f5d0d..88ec74e18cbf 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -122,13 +122,12 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al); struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct addr_location *al); -int machine__resolve_callchain(struct machine *machine, - struct perf_evsel *evsel, - struct thread *thread, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack); +int thread__resolve_callchain(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack); /* * Default guest kernel is defined by parameter --guestkallsyms diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 496f21cadd97..25e5a238f1cb 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -312,9 +312,9 @@ static PyObject *python_process_callchain(struct perf_sample *sample, if (!symbol_conf.use_callchain || !sample->callchain) goto exit; - if (machine__resolve_callchain(al->machine, evsel, al->thread, - sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + if (thread__resolve_callchain(al->thread, evsel, + sample, NULL, NULL, + PERF_MAX_STACK_DEPTH) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6702ac28754b..776010844cdc 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1417,9 +1417,9 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, if (symbol_conf.use_callchain && sample->callchain) { struct addr_location node_al; - if (machine__resolve_callchain(al->machine, evsel, al->thread, - sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + if (thread__resolve_callchain(al->thread, evsel, + sample, NULL, NULL, + PERF_MAX_STACK_DEPTH) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; -- cgit v1.2.3 From dd8c17a5fe80148aab8844e8774cf341212a4eb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Oct 2014 16:42:19 -0300 Subject: perf callchains: Use thread->mg->machine The unwind__get_entries() already receives the thread parameter, from where it can obtain the matching machine structure, shorten the signature. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-isjc6bm8mv4612mhi6af64go@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/dwarf-unwind.c | 18 +++++++++--------- tools/perf/util/machine.c | 2 +- tools/perf/util/unwind-libdw.c | 4 ++-- tools/perf/util/unwind-libunwind.c | 4 ++-- tools/perf/util/unwind.h | 2 -- 5 files changed, 14 insertions(+), 16 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index fc25e57f4a5d..ab28cca2cb97 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -59,7 +59,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) } __attribute__ ((noinline)) -static int unwind_thread(struct thread *thread, struct machine *machine) +static int unwind_thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -72,7 +72,7 @@ static int unwind_thread(struct thread *thread, struct machine *machine) goto out; } - err = unwind__get_entries(unwind_entry, &cnt, machine, thread, + err = unwind__get_entries(unwind_entry, &cnt, thread, &sample, MAX_STACK); if (err) pr_debug("unwind failed\n"); @@ -89,21 +89,21 @@ static int unwind_thread(struct thread *thread, struct machine *machine) } __attribute__ ((noinline)) -static int krava_3(struct thread *thread, struct machine *machine) +static int krava_3(struct thread *thread) { - return unwind_thread(thread, machine); + return unwind_thread(thread); } __attribute__ ((noinline)) -static int krava_2(struct thread *thread, struct machine *machine) +static int krava_2(struct thread *thread) { - return krava_3(thread, machine); + return krava_3(thread); } __attribute__ ((noinline)) -static int krava_1(struct thread *thread, struct machine *machine) +static int krava_1(struct thread *thread) { - return krava_2(thread, machine); + return krava_2(thread); } int test__dwarf_unwind(void) @@ -137,7 +137,7 @@ int test__dwarf_unwind(void) goto out; } - err = krava_1(thread, machine); + err = krava_1(thread); out: machine__delete_threads(machine); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fd192e4885cc..51a630301afa 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1486,7 +1486,7 @@ int thread__resolve_callchain(struct thread *thread, (!sample->user_stack.size)) return 0; - return unwind__get_entries(unwind_entry, &callchain_cursor, thread->mg->machine, + return unwind__get_entries(unwind_entry, &callchain_cursor, thread, sample, max_stack); } diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index f24b350ab192..2dcfe9a7c8d0 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -164,14 +164,14 @@ frame_callback(Dwfl_Frame *state, void *arg) } int unwind__get_entries(unwind_entry_cb_t cb, void *arg, - struct machine *machine, struct thread *thread, + struct thread *thread, struct perf_sample *data, int max_stack) { struct unwind_info ui = { .sample = data, .thread = thread, - .machine = machine, + .machine = thread->mg->machine, .cb = cb, .arg = arg, .max_stack = max_stack, diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 29acc8cccb56..371219a6daf1 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -592,14 +592,14 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, } int unwind__get_entries(unwind_entry_cb_t cb, void *arg, - struct machine *machine, struct thread *thread, + struct thread *thread, struct perf_sample *data, int max_stack) { u64 ip; struct unwind_info ui = { .sample = data, .thread = thread, - .machine = machine, + .machine = thread->mg->machine, }; int ret; diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index f50b737235eb..12790cf94618 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -16,7 +16,6 @@ typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); #ifdef HAVE_DWARF_UNWIND_SUPPORT int unwind__get_entries(unwind_entry_cb_t cb, void *arg, - struct machine *machine, struct thread *thread, struct perf_sample *data, int max_stack); /* libunwind specific */ @@ -38,7 +37,6 @@ static inline void unwind__finish_access(struct thread *thread __maybe_unused) { static inline int unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, - struct machine *machine __maybe_unused, struct thread *thread __maybe_unused, struct perf_sample *data __maybe_unused, int max_stack __maybe_unused) -- cgit v1.2.3 From d152d1be5962ace0706066db71b4f05dff8764eb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 23 Oct 2014 00:15:45 +0900 Subject: perf tools: Add PARSE_OPT_DISABLED flag In some cases, we need to reuse exising options with some of them disabled. To do that, add PARSE_OPT_DISABLED flag and set_option_flag() function. Signed-off-by: Namhyung Kim Acked-by: Hemant Kumar Cc: Alexander Yarygin Cc: David Ahern Cc: Hemant Kumar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1413990949-13953-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 17 +++++++++++++++++ tools/perf/util/parse-options.h | 2 ++ 2 files changed, 19 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index bf48092983c6..b6016101b40b 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -42,6 +42,8 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "takes no value", flags); if (unset && (opt->flags & PARSE_OPT_NONEG)) return opterror(opt, "isn't available", flags); + if (opt->flags & PARSE_OPT_DISABLED) + return opterror(opt, "is not usable", flags); if (!(flags & OPT_SHORT) && p->opt) { switch (opt->type) { @@ -509,6 +511,8 @@ static void print_option_help(const struct option *opts, int full) } if (!full && (opts->flags & PARSE_OPT_HIDDEN)) return; + if (opts->flags & PARSE_OPT_DISABLED) + return; pos = fprintf(stderr, " "); if (opts->short_name) @@ -679,3 +683,16 @@ int parse_opt_verbosity_cb(const struct option *opt, } return 0; } + +void set_option_flag(struct option *opts, int shortopt, const char *longopt, + int flag) +{ + for (; opts->type != OPTION_END; opts++) { + if ((shortopt && opts->short_name == shortopt) || + (opts->long_name && longopt && + !strcmp(opts->long_name, longopt))) { + opts->flags |= flag; + break; + } + } +} diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index b59ba858e73d..b7c80dbc7627 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -38,6 +38,7 @@ enum parse_opt_option_flags { PARSE_OPT_NONEG = 4, PARSE_OPT_HIDDEN = 8, PARSE_OPT_LASTARG_DEFAULT = 16, + PARSE_OPT_DISABLED = 32, }; struct option; @@ -211,4 +212,5 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int); extern const char *parse_options_fix_filename(const char *prefix, const char *file); +void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag); #endif /* __PERF_PARSE_OPTIONS_H */ -- cgit v1.2.3 From 42bd71d0812ecd955cf65a14375ebe6a3195d979 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 23 Oct 2014 00:15:48 +0900 Subject: perf tools: Add support for exclusive option Some options cannot be used at the same time. To handle such options add a new PARSE_OPT_EXCLUSIVE flag and show error message if more than one of them is used. Signed-off-by: Namhyung Kim Reviewed-by: Masami Hiramatsu Acked-by: Hemant Kumar Cc: David Ahern Cc: Hemant Kumar Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1413990949-13953-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-options.c | 59 +++++++++++++++++++++++++++++++++-------- tools/perf/util/parse-options.h | 2 ++ 2 files changed, 50 insertions(+), 11 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b6016101b40b..f62dee7bd924 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -45,6 +45,23 @@ static int get_value(struct parse_opt_ctx_t *p, if (opt->flags & PARSE_OPT_DISABLED) return opterror(opt, "is not usable", flags); + if (opt->flags & PARSE_OPT_EXCLUSIVE) { + if (p->excl_opt) { + char msg[128]; + + if (((flags & OPT_SHORT) && p->excl_opt->short_name) || + p->excl_opt->long_name == NULL) { + scnprintf(msg, sizeof(msg), "cannot be used with switch `%c'", + p->excl_opt->short_name); + } else { + scnprintf(msg, sizeof(msg), "cannot be used with %s", + p->excl_opt->long_name); + } + opterror(opt, msg, flags); + return -3; + } + p->excl_opt = opt; + } if (!(flags & OPT_SHORT) && p->opt) { switch (opt->type) { case OPTION_CALLBACK: @@ -345,13 +362,14 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, const char * const usagestr[]) { int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); + int excl_short_opt = 1; + const char *arg; /* we must reset ->opt, unknown short option leave it dangling */ ctx->opt = NULL; for (; ctx->argc; ctx->argc--, ctx->argv++) { - const char *arg = ctx->argv[0]; - + arg = ctx->argv[0]; if (*arg != '-' || !arg[1]) { if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) break; @@ -360,19 +378,21 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, } if (arg[1] != '-') { - ctx->opt = arg + 1; + ctx->opt = ++arg; if (internal_help && *ctx->opt == 'h') return usage_with_options_internal(usagestr, options, 0); switch (parse_short_opt(ctx, options)) { case -1: - return parse_options_usage(usagestr, options, arg + 1, 1); + return parse_options_usage(usagestr, options, arg, 1); case -2: goto unknown; + case -3: + goto exclusive; default: break; } if (ctx->opt) - check_typos(arg + 1, options); + check_typos(arg, options); while (ctx->opt) { if (internal_help && *ctx->opt == 'h') return usage_with_options_internal(usagestr, options, 0); @@ -389,6 +409,8 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, ctx->argv[0] = strdup(ctx->opt - 1); *(char *)ctx->argv[0] = '-'; goto unknown; + case -3: + goto exclusive; default: break; } @@ -404,19 +426,23 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, break; } - if (internal_help && !strcmp(arg + 2, "help-all")) + arg += 2; + if (internal_help && !strcmp(arg, "help-all")) return usage_with_options_internal(usagestr, options, 1); - if (internal_help && !strcmp(arg + 2, "help")) + if (internal_help && !strcmp(arg, "help")) return usage_with_options_internal(usagestr, options, 0); - if (!strcmp(arg + 2, "list-opts")) + if (!strcmp(arg, "list-opts")) return PARSE_OPT_LIST_OPTS; - if (!strcmp(arg + 2, "list-cmds")) + if (!strcmp(arg, "list-cmds")) return PARSE_OPT_LIST_SUBCMDS; - switch (parse_long_opt(ctx, arg + 2, options)) { + switch (parse_long_opt(ctx, arg, options)) { case -1: - return parse_options_usage(usagestr, options, arg + 2, 0); + return parse_options_usage(usagestr, options, arg, 0); case -2: goto unknown; + case -3: + excl_short_opt = 0; + goto exclusive; default: break; } @@ -428,6 +454,17 @@ unknown: ctx->opt = NULL; } return PARSE_OPT_DONE; + +exclusive: + parse_options_usage(usagestr, options, arg, excl_short_opt); + if ((excl_short_opt && ctx->excl_opt->short_name) || + ctx->excl_opt->long_name == NULL) { + char opt = ctx->excl_opt->short_name; + parse_options_usage(NULL, options, &opt, 1); + } else { + parse_options_usage(NULL, options, ctx->excl_opt->long_name, 0); + } + return PARSE_OPT_HELP; } int parse_options_end(struct parse_opt_ctx_t *ctx) diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index b7c80dbc7627..97b153fb4999 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -39,6 +39,7 @@ enum parse_opt_option_flags { PARSE_OPT_HIDDEN = 8, PARSE_OPT_LASTARG_DEFAULT = 16, PARSE_OPT_DISABLED = 32, + PARSE_OPT_EXCLUSIVE = 64, }; struct option; @@ -174,6 +175,7 @@ struct parse_opt_ctx_t { const char **out; int argc, cpidx; const char *opt; + const struct option *excl_opt; int flags; }; -- cgit v1.2.3 From ed3077585f2f041e0db0fc41060b69673e98963b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Thu, 16 Oct 2014 11:08:29 +0800 Subject: perf tools: Ensure return negative value when write header error When 'perf record' write headers, it calls write_xxx in tools/perf/util/header.c, and check return value. It rolls back all working only when return value is negative. This patch ensures write_cpudesc() and write_total_mem() return negative number when error. Without this patch, headers reported by 'perf report' header is error in some platform. Following output is caputured on ARM, which doesn't contain "Processor" field in /proc/cpuinfo. See "cpudesc", "total memory" and "cmdline" field. bash-4.2# perf record ls ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (~36 samples) ] bash-4.2# perf report --stdio --header Error: The perf.data file has no samples! # ======== # captured on: Fri Sep 12 10:09:10 2014 # hostname : arma15el # os release : 3.17.0+ # perf version : 3.10.53 # arch : armv7l # nrcpus online : 4 # nrcpus avail : 1 # cpudesc : (null) # total memory : 0 kB # cmdline : # event : name = cycles, type = 0, config = 0x0, config1 = 0x0, config2 = 0x0, excl_usr = 0, excl_kern = 0, excl_host = 0, excl_guest = 1, precise_ip = 0 # pmu mappings: not available # ======== # Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Li Zefan Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Waiman Long Link: http://lkml.kernel.org/r/1413428909-80017-1-git-send-email-wangnan0@huawei.com Signed-off-by: Wang Nan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 26f5b2fe5dc8..0ecf4a304cbc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -601,8 +601,10 @@ static int __write_cpudesc(int fd, const char *cpuinfo_proc) break; } - if (ret) + if (ret) { + ret = -1; goto done; + } s = buf; @@ -965,7 +967,8 @@ static int write_total_mem(int fd, struct perf_header *h __maybe_unused, n = sscanf(buf, "%*s %"PRIu64, &mem); if (n == 1) ret = do_write(fd, &mem, sizeof(mem)); - } + } else + ret = -1; free(buf); fclose(fp); return ret; -- cgit v1.2.3 From 42634bc7a02ead59cf2d50e60d8b8f825de8a3b0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Oct 2014 13:45:10 +0300 Subject: perf pmu: Let pmu's with no events show up on perf list perf list only lists PMUs with events. Add a flag to cause a PMU to be also listed separately. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414061124-26830-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 13 +++++++++++-- tools/perf/util/pmu.h | 1 + 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e243ad962a4d..91dca604c422 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -747,15 +747,18 @@ void print_pmu_events(const char *event_glob, bool name_only) pmu = NULL; len = 0; - while ((pmu = perf_pmu__scan(pmu)) != NULL) + while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, &pmu->aliases, list) len++; + if (pmu->selectable) + len++; + } aliases = malloc(sizeof(char *) * len); if (!aliases) return; pmu = NULL; j = 0; - while ((pmu = perf_pmu__scan(pmu)) != NULL) + while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, &pmu->aliases, list) { char *name = format_alias(buf, sizeof(buf), pmu, alias); bool is_cpu = !strcmp(pmu->name, "cpu"); @@ -772,6 +775,12 @@ void print_pmu_events(const char *event_glob, bool name_only) aliases[j] = strdup(aliases[j]); j++; } + if (pmu->selectable) { + scnprintf(buf, sizeof(buf), "%s//", pmu->name); + aliases[j] = strdup(buf); + j++; + } + } len = j; qsort(aliases, len, sizeof(char *), cmp_string); for (j = 0; j < len; j++) { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index fe9dfbee8eed..8092de78e818 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -18,6 +18,7 @@ struct perf_event_attr; struct perf_pmu { char *name; __u32 type; + bool selectable; struct perf_event_attr *default_config; struct cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ -- cgit v1.2.3 From e477f3f01a89a8fd44031e7f2ba6ffcab037336c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Oct 2014 18:16:03 -0300 Subject: perf tools: Build programs to copy 32-bit compatibility perf tools copy VDSO out of memory. However, on 64-bit machines there may be 32-bit compatibility VDOs also. To copy those requires separate 32-bit executables. This patch adds to the build additional programs perf-read-vdso32 and perf-read-vdsox32 for 32-bit and x32 respectively. Signed-off-by: Adrian Hunter Cc: Peter Zijlstra Cc: David Ahern Cc: Frederic Weisbecker , Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414061124-26830-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 38 ++++++++++++++++++++++--- tools/perf/config/Makefile | 25 +++++++++++++++- tools/perf/config/Makefile.arch | 8 ++++++ tools/perf/config/feature-checks/Makefile | 10 ++++++- tools/perf/config/feature-checks/test-compile.c | 4 +++ tools/perf/perf-read-vdso.c | 34 ++++++++++++++++++++++ tools/perf/util/find-vdso-map.c | 30 +++++++++++++++++++ tools/perf/util/vdso.c | 37 ++++-------------------- 8 files changed, 149 insertions(+), 37 deletions(-) create mode 100644 tools/perf/config/feature-checks/test-compile.c create mode 100644 tools/perf/perf-read-vdso.c create mode 100644 tools/perf/util/find-vdso-map.c (limited to 'tools/perf/util') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 262916f4a377..9c4ced0fc845 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -60,6 +60,12 @@ include config/utilities.mak # # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. +# +# Define NO_PERF_READ_VDSO32 if you do not want to build perf-read-vdso32 +# for reading the 32-bit compatibility VDSO in 64-bit mode +# +# Define NO_PERF_READ_VDSOX32 if you do not want to build perf-read-vdsox32 +# for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -171,11 +177,16 @@ $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) -# -# Single 'perf' binary right now: -# PROGRAMS += $(OUTPUT)perf +ifndef NO_PERF_READ_VDSO32 +PROGRAMS += $(OUTPUT)perf-read-vdso32 +endif + +ifndef NO_PERF_READ_VDSOX32 +PROGRAMS += $(OUTPUT)perf-read-vdsox32 +endif + # what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) @@ -253,6 +264,7 @@ LIB_H += util/event.h LIB_H += util/evsel.h LIB_H += util/evlist.h LIB_H += util/exec_cmd.h +LIB_H += util/find-vdso-map.c LIB_H += util/levenshtein.h LIB_H += util/machine.h LIB_H += util/map.h @@ -732,6 +744,16 @@ $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Uti $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) +ifndef NO_PERF_READ_VDSO32 +$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-vdso-map.c + $(QUIET_CC)$(CC) -m32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c +endif + +ifndef NO_PERF_READ_VDSOX32 +$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c + $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c +endif + $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) $(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) @@ -876,6 +898,14 @@ install-bin: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \ $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace' +ifndef NO_PERF_READ_VDSO32 + $(call QUIET_INSTALL, perf-read-vdso32) \ + $(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)'; +endif +ifndef NO_PERF_READ_VDSOX32 + $(call QUIET_INSTALL, perf-read-vdsox32) \ + $(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)'; +endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-archive) \ @@ -928,7 +958,7 @@ config-clean: clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) - $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 58f609198c6d..3ba2382a5236 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -230,7 +230,9 @@ VF_FEATURE_TESTS = \ bionic \ liberty \ liberty-z \ - cplus-demangle + cplus-demangle \ + compile-32 \ + compile-x32 # Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not @@ -622,6 +624,27 @@ ifdef HAVE_KVM_STAT_SUPPORT CFLAGS += -DHAVE_KVM_STAT_SUPPORT endif +ifeq (${IS_64_BIT}, 1) + ifndef NO_PERF_READ_VDSO32 + $(call feature_check,compile-32) + ifneq ($(feature-compile-32), 1) + NO_PERF_READ_VDSO32 := 1 + endif + endif + ifneq (${IS_X86_64}, 1) + NO_PERF_READ_VDSOX32 := 1 + endif + ifndef NO_PERF_READ_VDSOX32 + $(call feature_check,compile-x32) + ifneq ($(feature-compile-x32), 1) + NO_PERF_READ_VDSOX32 := 1 + endif + endif +else + NO_PERF_READ_VDSO32 := 1 + NO_PERF_READ_VDSOX32 := 1 +endif + # Among the variables below, these: # perfexecdir # template_dir diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index 4b06719ee984..851cd0172a76 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -21,3 +21,11 @@ ifeq ($(ARCH),x86_64) RAW_ARCH := x86_64 endif endif + +ifeq (${IS_X86_64}, 1) + IS_64_BIT := 1 +else ifeq ($(ARCH),x86) + IS_64_BIT := 0 +else + IS_64_BIT := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +endif diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 72ab2984718e..7c68ec74a808 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -27,7 +27,9 @@ FILES= \ test-libunwind-debug-frame.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ - test-libdw-dwarf-unwind.bin + test-libdw-dwarf-unwind.bin \ + test-compile-32.bin \ + test-compile-x32.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -131,6 +133,12 @@ test-libdw-dwarf-unwind.bin: test-sync-compare-and-swap.bin: $(BUILD) -Werror +test-compile-32.bin: + $(CC) -m32 -o $(OUTPUT)$@ test-compile.c + +test-compile-x32.bin: + $(CC) -mx32 -o $(OUTPUT)$@ test-compile.c + -include *.d ############################### diff --git a/tools/perf/config/feature-checks/test-compile.c b/tools/perf/config/feature-checks/test-compile.c new file mode 100644 index 000000000000..31dbf45bf99c --- /dev/null +++ b/tools/perf/config/feature-checks/test-compile.c @@ -0,0 +1,4 @@ +int main(void) +{ + return 0; +} diff --git a/tools/perf/perf-read-vdso.c b/tools/perf/perf-read-vdso.c new file mode 100644 index 000000000000..764e2547c25a --- /dev/null +++ b/tools/perf/perf-read-vdso.c @@ -0,0 +1,34 @@ +#include +#include + +#define VDSO__MAP_NAME "[vdso]" + +/* + * Include definition of find_vdso_map() also used in util/vdso.c for + * building perf. + */ +#include "util/find-vdso-map.c" + +int main(void) +{ + void *start, *end; + size_t size, written; + + if (find_vdso_map(&start, &end)) + return 1; + + size = end - start; + + while (size) { + written = fwrite(start, 1, size, stdout); + if (!written) + return 1; + start += written; + size -= written; + } + + if (fflush(stdout)) + return 1; + + return 0; +} diff --git a/tools/perf/util/find-vdso-map.c b/tools/perf/util/find-vdso-map.c new file mode 100644 index 000000000000..95ef1cffc056 --- /dev/null +++ b/tools/perf/util/find-vdso-map.c @@ -0,0 +1,30 @@ +static int find_vdso_map(void **start, void **end) +{ + FILE *maps; + char line[128]; + int found = 0; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + fprintf(stderr, "vdso: cannot open maps\n"); + return -1; + } + + while (!found && fgets(line, sizeof(line), maps)) { + int m = -1; + + /* We care only about private r-x mappings. */ + if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", + start, end, &m)) + continue; + if (m < 0) + continue; + + if (!strncmp(&line[m], VDSO__MAP_NAME, + sizeof(VDSO__MAP_NAME) - 1)) + found = 1; + } + + fclose(maps); + return !found; +} diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index adca69384fcc..f51390a1ed51 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -15,6 +15,12 @@ #include "linux/string.h" #include "debug.h" +/* + * Include definition of find_vdso_map() also used in perf-read-vdso.c for + * building perf-read-vdso32 and perf-read-vdsox32. + */ +#include "find-vdso-map.c" + #define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX" struct vdso_file { @@ -40,37 +46,6 @@ static struct vdso_info *vdso_info__new(void) return memdup(&vdso_info_init, sizeof(vdso_info_init)); } -static int find_vdso_map(void **start, void **end) -{ - FILE *maps; - char line[128]; - int found = 0; - - maps = fopen("/proc/self/maps", "r"); - if (!maps) { - pr_err("vdso: cannot open maps\n"); - return -1; - } - - while (!found && fgets(line, sizeof(line), maps)) { - int m = -1; - - /* We care only about private r-x mappings. */ - if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", - start, end, &m)) - continue; - if (m < 0) - continue; - - if (!strncmp(&line[m], VDSO__MAP_NAME, - sizeof(VDSO__MAP_NAME) - 1)) - found = 1; - } - - fclose(maps); - return !found; -} - static char *get_file(struct vdso_file *vdso_file) { char *vdso = NULL; -- cgit v1.2.3 From f6832e1720f5cc283703cfe9ccbfb46a3fb6f548 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Oct 2014 13:45:23 +0300 Subject: perf tools: Add support for 32-bit compatibility VDSOs 'perf record' post-processes the event stream to create a list of build-ids for object files for which sample events have been recorded. That results in those object files being recorded in the build-id cache. In the case of VDSO, perf tools reads it from memory and copies it into a temporary file, which as decribed above, gets added to the build-id cache. Then when the perf.data file is processed by other tools, the build-id of VDSO is listed in the perf.data file and the VDSO can be read from the build-id cache. In that case the name of the map, the short name of the DSO, and the entry in the build-id cache are all "[vdso]". However, in the 64-bit case, there also can be 32-bit compatibility VDSOs. A previous patch added programs "perf-read-vdso32" and "perf read-vdsox32". This patch uses those programs to read the correct VDSO for a thread and create a temporary file just as for the 64-bit VDSO. The map name and the entry in the build-id cache are still "[vdso]" but the DSO short name becomes "[vdso32]" and "[vdsox32]" respectively. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414061124-26830-16-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/vdso.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/vdso.h | 4 +- 2 files changed, 172 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index f51390a1ed51..69daef6a17d5 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -12,6 +12,7 @@ #include "util.h" #include "symbol.h" #include "machine.h" +#include "thread.h" #include "linux/string.h" #include "debug.h" @@ -28,10 +29,15 @@ struct vdso_file { bool error; char temp_file_name[sizeof(VDSO__TEMP_FILE_NAME)]; const char *dso_name; + const char *read_prog; }; struct vdso_info { struct vdso_file vdso; +#if BITS_PER_LONG == 64 + struct vdso_file vdso32; + struct vdso_file vdsox32; +#endif }; static struct vdso_info *vdso_info__new(void) @@ -41,6 +47,18 @@ static struct vdso_info *vdso_info__new(void) .temp_file_name = VDSO__TEMP_FILE_NAME, .dso_name = DSO__NAME_VDSO, }, +#if BITS_PER_LONG == 64 + .vdso32 = { + .temp_file_name = VDSO__TEMP_FILE_NAME, + .dso_name = DSO__NAME_VDSO32, + .read_prog = "perf-read-vdso32", + }, + .vdsox32 = { + .temp_file_name = VDSO__TEMP_FILE_NAME, + .dso_name = DSO__NAME_VDSOX32, + .read_prog = "perf-read-vdsox32", + }, +#endif }; return memdup(&vdso_info_init, sizeof(vdso_info_init)); @@ -92,6 +110,12 @@ void vdso__exit(struct machine *machine) if (vdso_info->vdso.found) unlink(vdso_info->vdso.temp_file_name); +#if BITS_PER_LONG == 64 + if (vdso_info->vdso32.found) + unlink(vdso_info->vdso32.temp_file_name); + if (vdso_info->vdsox32.found) + unlink(vdso_info->vdsox32.temp_file_name); +#endif zfree(&machine->vdso_info); } @@ -110,6 +134,143 @@ static struct dso *vdso__new(struct machine *machine, const char *short_name, return dso; } +#if BITS_PER_LONG == 64 + +static enum dso_type machine__thread_dso_type(struct machine *machine, + struct thread *thread) +{ + enum dso_type dso_type = DSO__TYPE_UNKNOWN; + struct map *map; + struct dso *dso; + + map = map_groups__first(thread->mg, MAP__FUNCTION); + for (; map ; map = map_groups__next(map)) { + dso = map->dso; + if (!dso || dso->long_name[0] != '/') + continue; + dso_type = dso__type(dso, machine); + if (dso_type != DSO__TYPE_UNKNOWN) + break; + } + + return dso_type; +} + +static int vdso__do_copy_compat(FILE *f, int fd) +{ + char buf[4096]; + size_t count; + + while (1) { + count = fread(buf, 1, sizeof(buf), f); + if (ferror(f)) + return -errno; + if (feof(f)) + break; + if (count && writen(fd, buf, count) != (ssize_t)count) + return -errno; + } + + return 0; +} + +static int vdso__copy_compat(const char *prog, int fd) +{ + FILE *f; + int err; + + f = popen(prog, "r"); + if (!f) + return -errno; + + err = vdso__do_copy_compat(f, fd); + + if (pclose(f) == -1) + return -errno; + + return err; +} + +static int vdso__create_compat_file(const char *prog, char *temp_name) +{ + int fd, err; + + fd = mkstemp(temp_name); + if (fd < 0) + return -errno; + + err = vdso__copy_compat(prog, fd); + + if (close(fd) == -1) + return -errno; + + return err; +} + +static const char *vdso__get_compat_file(struct vdso_file *vdso_file) +{ + int err; + + if (vdso_file->found) + return vdso_file->temp_file_name; + + if (vdso_file->error) + return NULL; + + err = vdso__create_compat_file(vdso_file->read_prog, + vdso_file->temp_file_name); + if (err) { + pr_err("%s failed, error %d\n", vdso_file->read_prog, err); + vdso_file->error = true; + return NULL; + } + + vdso_file->found = true; + + return vdso_file->temp_file_name; +} + +static struct dso *vdso__findnew_compat(struct machine *machine, + struct vdso_file *vdso_file) +{ + const char *file_name; + struct dso *dso; + + dso = dsos__find(&machine->user_dsos, vdso_file->dso_name, true); + if (dso) + return dso; + + file_name = vdso__get_compat_file(vdso_file); + if (!file_name) + return NULL; + + return vdso__new(machine, vdso_file->dso_name, file_name); +} + +static int vdso__dso_findnew_compat(struct machine *machine, + struct thread *thread, + struct vdso_info *vdso_info, + struct dso **dso) +{ + enum dso_type dso_type; + + dso_type = machine__thread_dso_type(machine, thread); + switch (dso_type) { + case DSO__TYPE_32BIT: + *dso = vdso__findnew_compat(machine, &vdso_info->vdso32); + return 1; + case DSO__TYPE_X32BIT: + *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32); + return 1; + case DSO__TYPE_UNKNOWN: + case DSO__TYPE_64BIT: + default: + return 0; + } +} + +#endif + struct dso *vdso__dso_findnew(struct machine *machine, struct thread *thread __maybe_unused) { @@ -123,6 +284,11 @@ struct dso *vdso__dso_findnew(struct machine *machine, if (!vdso_info) return NULL; +#if BITS_PER_LONG == 64 + if (vdso__dso_findnew_compat(machine, thread, vdso_info, &dso)) + return dso; +#endif + dso = dsos__find(&machine->user_dsos, DSO__NAME_VDSO, true); if (!dso) { char *file; @@ -139,5 +305,7 @@ struct dso *vdso__dso_findnew(struct machine *machine, bool dso__is_vdso(struct dso *dso) { - return !strcmp(dso->short_name, DSO__NAME_VDSO); + return !strcmp(dso->short_name, DSO__NAME_VDSO) || + !strcmp(dso->short_name, DSO__NAME_VDSO32) || + !strcmp(dso->short_name, DSO__NAME_VDSOX32); } diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h index af9d6929a215..d97da1616f0c 100644 --- a/tools/perf/util/vdso.h +++ b/tools/perf/util/vdso.h @@ -7,7 +7,9 @@ #define VDSO__MAP_NAME "[vdso]" -#define DSO__NAME_VDSO "[vdso]" +#define DSO__NAME_VDSO "[vdso]" +#define DSO__NAME_VDSO32 "[vdso32]" +#define DSO__NAME_VDSOX32 "[vdsox32]" static inline bool is_vdso_map(const char *filename) { -- cgit v1.2.3 From 46b1fa85ff5a2e03423770b3931b97266e8ac6cf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Oct 2014 13:45:24 +0300 Subject: perf tools: Do not attempt to run perf-read-vdso32 if it wasn't built popen() causes an error message to print if perf-read-vdso32 does not run. Avoid that by not trying to run it if it was not built. Ditto perf-read-vdsox32. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414061124-26830-17-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/config/Makefile | 8 ++++++-- tools/perf/util/vdso.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 3ba2382a5236..71264e41fa85 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -627,7 +627,9 @@ endif ifeq (${IS_64_BIT}, 1) ifndef NO_PERF_READ_VDSO32 $(call feature_check,compile-32) - ifneq ($(feature-compile-32), 1) + ifeq ($(feature-compile-32), 1) + CFLAGS += -DHAVE_PERF_READ_VDSO32 + else NO_PERF_READ_VDSO32 := 1 endif endif @@ -636,7 +638,9 @@ ifeq (${IS_64_BIT}, 1) endif ifndef NO_PERF_READ_VDSOX32 $(call feature_check,compile-x32) - ifneq ($(feature-compile-x32), 1) + ifeq ($(feature-compile-x32), 1) + CFLAGS += -DHAVE_PERF_READ_VDSOX32 + else NO_PERF_READ_VDSOX32 := 1 endif endif diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 69daef6a17d5..5c7dd796979d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -255,6 +255,16 @@ static int vdso__dso_findnew_compat(struct machine *machine, enum dso_type dso_type; dso_type = machine__thread_dso_type(machine, thread); + +#ifndef HAVE_PERF_READ_VDSO32 + if (dso_type == DSO__TYPE_32BIT) + return 0; +#endif +#ifndef HAVE_PERF_READ_VDSOX32 + if (dso_type == DSO__TYPE_X32BIT) + return 0; +#endif + switch (dso_type) { case DSO__TYPE_32BIT: *dso = vdso__findnew_compat(machine, &vdso_info->vdso32); -- cgit v1.2.3 From 7e4772dc99a3ebfc53708eff262f7a8155485e85 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 24 Oct 2014 10:25:09 -0300 Subject: perf pmu: Add proper error handling to print_pmu_events() It was silently returning or printing "(null)" when no memory was available at various points. Fix it by checking and warning the user when that happens. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jean Pihet Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-835udmf66x9nza504cu6irz9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 91dca604c422..881b75490533 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -753,9 +753,9 @@ void print_pmu_events(const char *event_glob, bool name_only) if (pmu->selectable) len++; } - aliases = malloc(sizeof(char *) * len); + aliases = zalloc(sizeof(char *) * len); if (!aliases) - return; + goto out_enomem; pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { @@ -768,16 +768,20 @@ void print_pmu_events(const char *event_glob, bool name_only) (!is_cpu && strglobmatch(alias->name, event_glob)))) continue; - aliases[j] = name; + if (is_cpu && !name_only) - aliases[j] = format_alias_or(buf, sizeof(buf), - pmu, alias); - aliases[j] = strdup(aliases[j]); + name = format_alias_or(buf, sizeof(buf), pmu, alias); + + aliases[j] = strdup(name); + if (aliases[j] == NULL) + goto out_enomem; j++; } if (pmu->selectable) { - scnprintf(buf, sizeof(buf), "%s//", pmu->name); - aliases[j] = strdup(buf); + char *s; + if (asprintf(&s, "%s//", pmu->name) < 0) + goto out_enomem; + aliases[j] = s; j++; } } @@ -789,12 +793,20 @@ void print_pmu_events(const char *event_glob, bool name_only) continue; } printf(" %-50s [Kernel PMU event]\n", aliases[j]); - zfree(&aliases[j]); printed++; } if (printed) printf("\n"); - free(aliases); +out_free: + for (j = 0; j < len; j++) + zfree(&aliases[j]); + zfree(&aliases); + return; + +out_enomem: + printf("FATAL: not enough memory to print PMU events\n"); + if (aliases) + goto out_free; } bool pmu_have_event(const char *pname, const char *name) -- cgit v1.2.3 From 0db15b1e84a59e6e1da5fe6e74c35fe52fa29d92 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Oct 2014 13:45:13 +0300 Subject: perf tools: Add facility to export data in database-friendly way This patch introduces an abstraction for exporting sample data in a database-friendly way. The abstraction does not implement the actual output. A subsequent patch takes this facility into use for extending the script interface. The abstraction is needed because static data like symbols, dsos, comms etc need to be exported only once. That means allocating them a unique identifier and recording it on each structure. The member 'db_id' is used for that. 'db_id' is just a 64-bit sequence number. Exporting centres around the db_export__sample() function which exports the associated data structures if they have not yet been allocated a db_id. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414061124-26830-6-git-send-email-adrian.hunter@intel.com [ committer note: Stash db_id using symbol_conf.priv_size + symbol__priv() and foo->priv areas ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 + tools/perf/util/comm.h | 4 + tools/perf/util/db-export.c | 270 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/db-export.h | 86 ++++++++++++++ tools/perf/util/dso.h | 5 + tools/perf/util/evsel.h | 2 + tools/perf/util/machine.h | 4 + tools/perf/util/thread.h | 1 + 8 files changed, 374 insertions(+) create mode 100644 tools/perf/util/db-export.c create mode 100644 tools/perf/util/db-export.h (limited to 'tools/perf/util') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9c4ced0fc845..3caf7dab50e8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -258,6 +258,7 @@ LIB_H += util/annotate.h LIB_H += util/cache.h LIB_H += util/callchain.h LIB_H += util/build-id.h +LIB_H += util/db-export.h LIB_H += util/debug.h LIB_H += util/pmu.h LIB_H += util/event.h @@ -323,6 +324,7 @@ LIB_OBJS += $(OUTPUT)util/annotate.o LIB_OBJS += $(OUTPUT)util/build-id.o LIB_OBJS += $(OUTPUT)util/config.o LIB_OBJS += $(OUTPUT)util/ctype.o +LIB_OBJS += $(OUTPUT)util/db-export.o LIB_OBJS += $(OUTPUT)util/pmu.o LIB_OBJS += $(OUTPUT)util/environment.o LIB_OBJS += $(OUTPUT)util/event.o diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h index 51c10ab257f8..71c9c39340d4 100644 --- a/tools/perf/util/comm.h +++ b/tools/perf/util/comm.h @@ -12,6 +12,10 @@ struct comm { u64 start; struct list_head list; bool exec; + union { /* Tool specific area */ + void *priv; + u64 db_id; + }; }; void comm__free(struct comm *comm); diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c new file mode 100644 index 000000000000..be128b075a32 --- /dev/null +++ b/tools/perf/util/db-export.c @@ -0,0 +1,270 @@ +/* + * db-export.c: Support for exporting data suitable for import to a database + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include + +#include "evsel.h" +#include "machine.h" +#include "thread.h" +#include "comm.h" +#include "symbol.h" +#include "event.h" +#include "db-export.h" + +int db_export__init(struct db_export *dbe) +{ + memset(dbe, 0, sizeof(struct db_export)); + return 0; +} + +void db_export__exit(struct db_export *dbe __maybe_unused) +{ +} + +int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel) +{ + if (evsel->db_id) + return 0; + + evsel->db_id = ++dbe->evsel_last_db_id; + + if (dbe->export_evsel) + return dbe->export_evsel(dbe, evsel); + + return 0; +} + +int db_export__machine(struct db_export *dbe, struct machine *machine) +{ + if (machine->db_id) + return 0; + + machine->db_id = ++dbe->machine_last_db_id; + + if (dbe->export_machine) + return dbe->export_machine(dbe, machine); + + return 0; +} + +int db_export__thread(struct db_export *dbe, struct thread *thread, + struct machine *machine, struct comm *comm) +{ + u64 main_thread_db_id = 0; + int err; + + if (thread->db_id) + return 0; + + thread->db_id = ++dbe->thread_last_db_id; + + if (thread->pid_ != -1) { + struct thread *main_thread; + + if (thread->pid_ == thread->tid) { + main_thread = thread; + } else { + main_thread = machine__findnew_thread(machine, + thread->pid_, + thread->pid_); + if (!main_thread) + return -ENOMEM; + err = db_export__thread(dbe, main_thread, machine, + comm); + if (err) + return err; + if (comm) { + err = db_export__comm_thread(dbe, comm, thread); + if (err) + return err; + } + } + main_thread_db_id = main_thread->db_id; + } + + if (dbe->export_thread) + return dbe->export_thread(dbe, thread, main_thread_db_id, + machine); + + return 0; +} + +int db_export__comm(struct db_export *dbe, struct comm *comm, + struct thread *main_thread) +{ + int err; + + if (comm->db_id) + return 0; + + comm->db_id = ++dbe->comm_last_db_id; + + if (dbe->export_comm) { + err = dbe->export_comm(dbe, comm); + if (err) + return err; + } + + return db_export__comm_thread(dbe, comm, main_thread); +} + +int db_export__comm_thread(struct db_export *dbe, struct comm *comm, + struct thread *thread) +{ + u64 db_id; + + db_id = ++dbe->comm_thread_last_db_id; + + if (dbe->export_comm_thread) + return dbe->export_comm_thread(dbe, db_id, comm, thread); + + return 0; +} + +int db_export__dso(struct db_export *dbe, struct dso *dso, + struct machine *machine) +{ + if (dso->db_id) + return 0; + + dso->db_id = ++dbe->dso_last_db_id; + + if (dbe->export_dso) + return dbe->export_dso(dbe, dso, machine); + + return 0; +} + +int db_export__symbol(struct db_export *dbe, struct symbol *sym, + struct dso *dso) +{ + u64 *sym_db_id = symbol__priv(sym); + + if (*sym_db_id) + return 0; + + *sym_db_id = ++dbe->symbol_last_db_id; + + if (dbe->export_symbol) + return dbe->export_symbol(dbe, sym, dso); + + return 0; +} + +static struct thread *get_main_thread(struct machine *machine, struct thread *thread) +{ + if (thread->pid_ == thread->tid) + return thread; + + if (thread->pid_ == -1) + return NULL; + + return machine__find_thread(machine, thread->pid_, thread->pid_); +} + +static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, + u64 *dso_db_id, u64 *sym_db_id, u64 *offset) +{ + int err; + + if (al->map) { + struct dso *dso = al->map->dso; + + err = db_export__dso(dbe, dso, al->machine); + if (err) + return err; + *dso_db_id = dso->db_id; + + if (!al->sym) { + al->sym = symbol__new(al->addr, 0, 0, "unknown"); + if (al->sym) + symbols__insert(&dso->symbols[al->map->type], + al->sym); + } + + if (al->sym) { + u64 *db_id = symbol__priv(al->sym); + + err = db_export__symbol(dbe, al->sym, dso); + if (err) + return err; + *sym_db_id = *db_id; + *offset = al->addr - al->sym->start; + } + } + + return 0; +} + +int db_export__sample(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct perf_evsel *evsel, + struct thread *thread, struct addr_location *al) +{ + struct export_sample es = { + .event = event, + .sample = sample, + .evsel = evsel, + .thread = thread, + .al = al, + }; + struct thread *main_thread; + struct comm *comm = NULL; + int err; + + err = db_export__evsel(dbe, evsel); + if (err) + return err; + + err = db_export__machine(dbe, al->machine); + if (err) + return err; + + main_thread = get_main_thread(al->machine, thread); + if (main_thread) + comm = machine__thread_exec_comm(al->machine, main_thread); + + err = db_export__thread(dbe, thread, al->machine, comm); + if (err) + return err; + + if (comm) { + err = db_export__comm(dbe, comm, main_thread); + if (err) + return err; + es.comm_db_id = comm->db_id; + } + + es.db_id = ++dbe->sample_last_db_id; + + err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset); + if (err) + return err; + + if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && + sample_addr_correlates_sym(&evsel->attr)) { + struct addr_location addr_al; + + perf_event__preprocess_sample_addr(event, sample, thread, &addr_al); + err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id, + &es.addr_sym_db_id, &es.addr_offset); + if (err) + return err; + } + + if (dbe->export_sample) + return dbe->export_sample(dbe, &es); + + return 0; +} diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h new file mode 100644 index 000000000000..b3643e8e5750 --- /dev/null +++ b/tools/perf/util/db-export.h @@ -0,0 +1,86 @@ +/* + * db-export.h: Support for exporting data suitable for import to a database + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_DB_EXPORT_H +#define __PERF_DB_EXPORT_H + +#include + +struct perf_evsel; +struct machine; +struct thread; +struct comm; +struct dso; +struct perf_sample; +struct addr_location; + +struct export_sample { + union perf_event *event; + struct perf_sample *sample; + struct perf_evsel *evsel; + struct thread *thread; + struct addr_location *al; + u64 db_id; + u64 comm_db_id; + u64 dso_db_id; + u64 sym_db_id; + u64 offset; /* ip offset from symbol start */ + u64 addr_dso_db_id; + u64 addr_sym_db_id; + u64 addr_offset; /* addr offset from symbol start */ +}; + +struct db_export { + int (*export_evsel)(struct db_export *dbe, struct perf_evsel *evsel); + int (*export_machine)(struct db_export *dbe, struct machine *machine); + int (*export_thread)(struct db_export *dbe, struct thread *thread, + u64 main_thread_db_id, struct machine *machine); + int (*export_comm)(struct db_export *dbe, struct comm *comm); + int (*export_comm_thread)(struct db_export *dbe, u64 db_id, + struct comm *comm, struct thread *thread); + int (*export_dso)(struct db_export *dbe, struct dso *dso, + struct machine *machine); + int (*export_symbol)(struct db_export *dbe, struct symbol *sym, + struct dso *dso); + int (*export_sample)(struct db_export *dbe, struct export_sample *es); + u64 evsel_last_db_id; + u64 machine_last_db_id; + u64 thread_last_db_id; + u64 comm_last_db_id; + u64 comm_thread_last_db_id; + u64 dso_last_db_id; + u64 symbol_last_db_id; + u64 sample_last_db_id; +}; + +int db_export__init(struct db_export *dbe); +void db_export__exit(struct db_export *dbe); +int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); +int db_export__machine(struct db_export *dbe, struct machine *machine); +int db_export__thread(struct db_export *dbe, struct thread *thread, + struct machine *machine, struct comm *comm); +int db_export__comm(struct db_export *dbe, struct comm *comm, + struct thread *main_thread); +int db_export__comm_thread(struct db_export *dbe, struct comm *comm, + struct thread *thread); +int db_export__dso(struct db_export *dbe, struct dso *dso, + struct machine *machine); +int db_export__symbol(struct db_export *dbe, struct symbol *sym, + struct dso *dso); +int db_export__sample(struct db_export *dbe, union perf_event *event, + struct perf_sample *sample, struct perf_evsel *evsel, + struct thread *thread, struct addr_location *al); + +#endif diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3c9b391493f9..a316e4af321f 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -139,6 +139,11 @@ struct dso { struct list_head open_entry; } data; + union { /* Tool specific area */ + void *priv; + u64 db_id; + }; + char name[0]; }; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 163c5604e5d1..d3854c4f52e1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -54,6 +54,7 @@ struct cgroup_sel; * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or * PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all * is used there is an id sample appended to non-sample events + * @priv: And what is in its containing unnamed union are tool specific */ struct perf_evsel { struct list_head node; @@ -73,6 +74,7 @@ struct perf_evsel { union { void *priv; off_t id_offset; + u64 db_id; }; struct cgroup_sel *cgrp; void *handler; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 88ec74e18cbf..e8b7779a0a3f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -40,6 +40,10 @@ struct machine { u64 kernel_start; symbol_filter_t symbol_filter; pid_t *current_tid; + union { /* Tool specific area */ + void *priv; + u64 db_id; + }; }; static inline diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 6ef9fe6ff8da..d34cf5c0d0d9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -23,6 +23,7 @@ struct thread { bool dead; /* if set thread has exited */ struct list_head comm_list; int comm_len; + u64 db_id; void *priv; }; -- cgit v1.2.3 From df919b400ad3f9e6aac392ce421d710207abf9be Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 23 Oct 2014 13:45:14 +0300 Subject: perf scripting python: Extend interface to export data in a database-friendly way Use the new db_export facility to export data in a database-friendly way. A Python script selects the db_export mode by setting a global variable 'perf_db_export_mode' to True. The script then optionally implements functions to receive table rows. The functions are: evsel_table machine_table thread_table comm_table dso_table symbol_table sample_table An example script is provided in a subsequent patch. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414061124-26830-7-git-send-email-adrian.hunter@intel.com [ Reserve space for per symbol db_id space when perf_db_export_mode is on ] Signed-off-by: Arnaldo Carvalho de Melo --- .../util/scripting-engines/trace-event-python.c | 286 ++++++++++++++++++++- 1 file changed, 284 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 25e5a238f1cb..2fd7ee8f18c7 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "../../perf.h" @@ -33,6 +34,9 @@ #include "../util.h" #include "../event.h" #include "../thread.h" +#include "../comm.h" +#include "../machine.h" +#include "../db-export.h" #include "../trace-event.h" #include "../machine.h" @@ -53,6 +57,21 @@ static int zero_flag_atom; static PyObject *main_module, *main_dict; +struct tables { + struct db_export dbe; + PyObject *evsel_handler; + PyObject *machine_handler; + PyObject *thread_handler; + PyObject *comm_handler; + PyObject *comm_thread_handler; + PyObject *dso_handler; + PyObject *symbol_handler; + PyObject *sample_handler; + bool db_export_mode; +}; + +static struct tables tables_global; + static void handler_call_die(const char *handler_name) NORETURN; static void handler_call_die(const char *handler_name) { @@ -475,6 +494,211 @@ static void python_process_tracepoint(struct perf_sample *sample, Py_DECREF(t); } +static PyObject *tuple_new(unsigned int sz) +{ + PyObject *t; + + t = PyTuple_New(sz); + if (!t) + Py_FatalError("couldn't create Python tuple"); + return t; +} + +static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val) +{ +#if BITS_PER_LONG == 64 + return PyTuple_SetItem(t, pos, PyInt_FromLong(val)); +#endif +#if BITS_PER_LONG == 32 + return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val)); +#endif +} + +static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val) +{ + return PyTuple_SetItem(t, pos, PyInt_FromLong(val)); +} + +static int tuple_set_string(PyObject *t, unsigned int pos, const char *s) +{ + return PyTuple_SetItem(t, pos, PyString_FromString(s)); +} + +static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(2); + + tuple_set_u64(t, 0, evsel->db_id); + tuple_set_string(t, 1, perf_evsel__name(evsel)); + + call_object(tables->evsel_handler, t, "evsel_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_machine(struct db_export *dbe, + struct machine *machine) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(3); + + tuple_set_u64(t, 0, machine->db_id); + tuple_set_s32(t, 1, machine->pid); + tuple_set_string(t, 2, machine->root_dir ? machine->root_dir : ""); + + call_object(tables->machine_handler, t, "machine_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_thread(struct db_export *dbe, struct thread *thread, + u64 main_thread_db_id, struct machine *machine) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(5); + + tuple_set_u64(t, 0, thread->db_id); + tuple_set_u64(t, 1, machine->db_id); + tuple_set_u64(t, 2, main_thread_db_id); + tuple_set_s32(t, 3, thread->pid_); + tuple_set_s32(t, 4, thread->tid); + + call_object(tables->thread_handler, t, "thread_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_comm(struct db_export *dbe, struct comm *comm) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(2); + + tuple_set_u64(t, 0, comm->db_id); + tuple_set_string(t, 1, comm__str(comm)); + + call_object(tables->comm_handler, t, "comm_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_comm_thread(struct db_export *dbe, u64 db_id, + struct comm *comm, struct thread *thread) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(3); + + tuple_set_u64(t, 0, db_id); + tuple_set_u64(t, 1, comm->db_id); + tuple_set_u64(t, 2, thread->db_id); + + call_object(tables->comm_thread_handler, t, "comm_thread_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_dso(struct db_export *dbe, struct dso *dso, + struct machine *machine) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + PyObject *t; + + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + + t = tuple_new(5); + + tuple_set_u64(t, 0, dso->db_id); + tuple_set_u64(t, 1, machine->db_id); + tuple_set_string(t, 2, dso->short_name); + tuple_set_string(t, 3, dso->long_name); + tuple_set_string(t, 4, sbuild_id); + + call_object(tables->dso_handler, t, "dso_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_symbol(struct db_export *dbe, struct symbol *sym, + struct dso *dso) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + u64 *sym_db_id = symbol__priv(sym); + PyObject *t; + + t = tuple_new(6); + + tuple_set_u64(t, 0, *sym_db_id); + tuple_set_u64(t, 1, dso->db_id); + tuple_set_u64(t, 2, sym->start); + tuple_set_u64(t, 3, sym->end); + tuple_set_s32(t, 4, sym->binding); + tuple_set_string(t, 5, sym->name); + + call_object(tables->symbol_handler, t, "symbol_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_sample(struct db_export *dbe, + struct export_sample *es) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(19); + + tuple_set_u64(t, 0, es->db_id); + tuple_set_u64(t, 1, es->evsel->db_id); + tuple_set_u64(t, 2, es->al->machine->db_id); + tuple_set_u64(t, 3, es->thread->db_id); + tuple_set_u64(t, 4, es->comm_db_id); + tuple_set_u64(t, 5, es->dso_db_id); + tuple_set_u64(t, 6, es->sym_db_id); + tuple_set_u64(t, 7, es->offset); + tuple_set_u64(t, 8, es->sample->ip); + tuple_set_u64(t, 9, es->sample->time); + tuple_set_s32(t, 10, es->sample->cpu); + tuple_set_u64(t, 11, es->addr_dso_db_id); + tuple_set_u64(t, 12, es->addr_sym_db_id); + tuple_set_u64(t, 13, es->addr_offset); + tuple_set_u64(t, 14, es->sample->addr); + tuple_set_u64(t, 15, es->sample->period); + tuple_set_u64(t, 16, es->sample->weight); + tuple_set_u64(t, 17, es->sample->transaction); + tuple_set_u64(t, 18, es->sample->data_src); + + call_object(tables->sample_handler, t, "sample_table"); + + Py_DECREF(t); + + return 0; +} + static void python_process_general_event(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -551,19 +775,25 @@ exit: Py_DECREF(t); } -static void python_process_event(union perf_event *event __maybe_unused, +static void python_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, struct addr_location *al) { + struct tables *tables = &tables_global; + switch (evsel->attr.type) { case PERF_TYPE_TRACEPOINT: python_process_tracepoint(sample, evsel, thread, al); break; /* Reserve for future process_hw/sw/raw APIs */ default: - python_process_general_event(sample, evsel, thread, al); + if (tables->db_export_mode) + db_export__sample(&tables->dbe, event, sample, evsel, + thread, al); + else + python_process_general_event(sample, evsel, thread, al); } } @@ -589,11 +819,57 @@ error: return -1; } +#define SET_TABLE_HANDLER_(name, handler_name, table_name) do { \ + tables->handler_name = get_handler(#table_name); \ + if (tables->handler_name) \ + tables->dbe.export_ ## name = python_export_ ## name; \ +} while (0) + +#define SET_TABLE_HANDLER(name) \ + SET_TABLE_HANDLER_(name, name ## _handler, name ## _table) + +static void set_table_handlers(struct tables *tables) +{ + const char *perf_db_export_mode = "perf_db_export_mode"; + PyObject *db_export_mode; + int ret; + + memset(tables, 0, sizeof(struct tables)); + if (db_export__init(&tables->dbe)) + Py_FatalError("failed to initialize export"); + + db_export_mode = PyDict_GetItemString(main_dict, perf_db_export_mode); + if (!db_export_mode) + return; + + ret = PyObject_IsTrue(db_export_mode); + if (ret == -1) + handler_call_die(perf_db_export_mode); + if (!ret) + return; + + tables->db_export_mode = true; + /* + * Reserve per symbol space for symbol->db_id via symbol__priv() + */ + symbol_conf.priv_size = sizeof(u64); + + SET_TABLE_HANDLER(evsel); + SET_TABLE_HANDLER(machine); + SET_TABLE_HANDLER(thread); + SET_TABLE_HANDLER(comm); + SET_TABLE_HANDLER(comm_thread); + SET_TABLE_HANDLER(dso); + SET_TABLE_HANDLER(symbol); + SET_TABLE_HANDLER(sample); +} + /* * Start trace script */ static int python_start_script(const char *script, int argc, const char **argv) { + struct tables *tables = &tables_global; const char **command_line; char buf[PATH_MAX]; int i, err = 0; @@ -632,6 +908,8 @@ static int python_start_script(const char *script, int argc, const char **argv) free(command_line); + set_table_handlers(tables); + return err; error: Py_Finalize(); @@ -650,8 +928,12 @@ static int python_flush_script(void) */ static int python_stop_script(void) { + struct tables *tables = &tables_global; + try_call_object("trace_end", NULL); + db_export__exit(&tables->dbe); + Py_XDECREF(main_dict); Py_XDECREF(main_module); Py_Finalize(); -- cgit v1.2.3 From 5e17b28f1e246b98e08cb463f7d72cff6415fc53 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 27 Oct 2014 16:31:31 -0400 Subject: perf probe: Add --quiet option to suppress output result message Add --quiet(-q) option to suppress output result message for --add, and --del options (Note that --lines/funcs/vars are not affected). This option is useful if you run the perf probe inside your scripts. Signed-off-by: Masami Hiramatsu Cc: Hemant Kumar Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20141027203131.21219.35170.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-probe.c | 11 +++++++++++ tools/perf/util/probe-event.c | 18 +++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 2d3577d1009d..921bb6942503 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -55,6 +55,7 @@ static struct { bool show_funcs; bool mod_events; bool uprobes; + bool quiet; int nevents; struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; @@ -315,6 +316,8 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show parsed arguments, etc)"), + OPT_BOOLEAN('q', "quiet", ¶ms.quiet, + "be quiet (do not show any mesages)"), OPT_BOOLEAN('l', "list", ¶ms.list_events, "list up current probe events"), OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.", @@ -404,6 +407,14 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (params.quiet) { + if (verbose != 0) { + pr_err(" Error: -v and -q are exclusive.\n"); + return -EINVAL; + } + verbose = -1; + } + if (params.max_probe_points == 0) params.max_probe_points = MAX_PROBES; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index c150ca4343eb..28eb1417cb2a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1910,21 +1910,21 @@ static int show_perf_probe_event(struct perf_probe_event *pev, if (ret < 0) return ret; - printf(" %-20s (on %s", buf, place); + pr_info(" %-20s (on %s", buf, place); if (module) - printf(" in %s", module); + pr_info(" in %s", module); if (pev->nargs > 0) { - printf(" with"); + pr_info(" with"); for (i = 0; i < pev->nargs; i++) { ret = synthesize_perf_probe_arg(&pev->args[i], buf, 128); if (ret < 0) break; - printf(" %s", buf); + pr_info(" %s", buf); } } - printf(")\n"); + pr_info(")\n"); free(place); return ret; } @@ -2124,7 +2124,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, } ret = 0; - printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); + pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; if (pev->event) @@ -2179,8 +2179,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret >= 0) { /* Show how to use the event. */ - printf("\nYou can now use it in all perf tools, such as:\n\n"); - printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, + pr_info("\nYou can now use it in all perf tools, such as:\n\n"); + pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } @@ -2444,7 +2444,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) goto error; } - printf("Removed event: %s\n", ent->s); + pr_info("Removed event: %s\n", ent->s); return 0; error: pr_warning("Failed to delete event: %s\n", -- cgit v1.2.3 From 3c659eedada2fbf909c5818848753a6647a56426 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 27 Oct 2014 15:49:22 +0200 Subject: perf tools: Add id index Add an index of the event identifiers, in preparation for Intel PT. The event id (also called the sample id) is a unique number allocated by the kernel to the event created by perf_event_open(). Events can include the event id by having a sample type including PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER. Currently the main use of the event id is to match an event back to the evsel to which it belongs i.e. perf_evlist__id2evsel() The purpose of this patch is to make it possible to match an event back to the mmap from which it was read. The reason that is useful is because the mmap represents a time-ordered context (either for a cpu or for a thread). Intel PT decodes trace information on that basis. In full-trace mode, that information can be recorded when the Intel PT trace is read, but in sample-mode the Intel PT trace data is embedded in a sample and it is in that case that the "id index" is needed. So the mmaps are numbered (idx) and the cpu and tid recorded against the id by perf_evlist__set_sid_idx() which is called by perf_evlist__mmap_per_evsel(). That information is recorded on the perf.data file in the new "id index". idx, cpu and tid are added to struct perf_sample_id (which is the node of evlist's hash table to match ids to evsels). The information can be retrieved using perf_evlist__id2sid(). Note however this all depends on having a sample type including PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER, otherwise ids are not recorded. The "id index" is a synthesized event record which will be created when Intel PT sampling is used by calling perf_event__synthesize_id_index(). Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414417770-18602-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/event.h | 15 ++++++ tools/perf/util/evlist.c | 26 ++++++++-- tools/perf/util/evsel.h | 3 ++ tools/perf/util/session.c | 122 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/session.h | 10 ++++ tools/perf/util/tool.h | 3 +- 8 files changed, 177 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 06f1758951f1..84df2deed988 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -409,6 +409,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .tracing_data = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_op2_synth, .build_id = perf_event__repipe_op2_synth, + .id_index = perf_event__repipe_op2_synth, }, .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index e00a29fb099f..6c6d044e959a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -28,6 +28,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", + [PERF_RECORD_ID_INDEX] = "ID_INDEX", }; const char *perf_event__name(unsigned int id) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 5f0e0b89e130..8c7fe9d64e79 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -187,6 +187,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_HEADER_TRACING_DATA = 66, PERF_RECORD_HEADER_BUILD_ID = 67, PERF_RECORD_FINISHED_ROUND = 68, + PERF_RECORD_ID_INDEX = 69, PERF_RECORD_HEADER_MAX }; @@ -239,6 +240,19 @@ struct tracing_data_event { u32 size; }; +struct id_index_entry { + u64 id; + u64 idx; + u64 cpu; + u64 tid; +}; + +struct id_index_event { + struct perf_event_header header; + u64 nr; + struct id_index_entry entries[0]; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -253,6 +267,7 @@ union perf_event { struct event_type_event event_type; struct tracing_data_event tracing_data; struct build_id_event build_id; + struct id_index_event id_index; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3c9e77d6b4c2..0babd390963c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -527,6 +527,22 @@ static int perf_evlist__id_add_fd(struct perf_evlist *evlist, return 0; } +static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, + struct perf_evsel *evsel, int idx, int cpu, + int thread) +{ + struct perf_sample_id *sid = SID(evsel, cpu, thread); + sid->idx = idx; + if (evlist->cpus && cpu >= 0) + sid->cpu = evlist->cpus->map[cpu]; + else + sid->cpu = -1; + if (!evsel->system_wide && evlist->threads && thread >= 0) + sid->tid = evlist->threads->map[thread]; + else + sid->tid = -1; +} + struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) { struct hlist_head *head; @@ -805,9 +821,13 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, return -1; } - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) - return -1; + if (evsel->attr.read_format & PERF_FORMAT_ID) { + if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, + fd) < 0) + return -1; + perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, + thread); + } } return 0; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d3854c4f52e1..979790951bfb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -36,6 +36,9 @@ struct perf_sample_id { struct hlist_node node; u64 id; struct perf_evsel *evsel; + int idx; + int cpu; + pid_t tid; /* Holds total ID period value for PERF_SAMPLE_READ processing. */ u64 period; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 776010844cdc..27a0049118b5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -228,6 +228,15 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +static int process_id_index_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *perf_session + __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + void perf_tool__fill_defaults(struct perf_tool *tool) { if (tool->sample == NULL) @@ -262,6 +271,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) else tool->finished_round = process_finished_round_stub; } + if (tool->id_index == NULL) + tool->id_index = process_id_index_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -460,6 +471,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, [PERF_RECORD_HEADER_BUILD_ID] = NULL, + [PERF_RECORD_ID_INDEX] = perf_event__all64_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -888,6 +900,8 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: return tool->finished_round(tool, event, session); + case PERF_RECORD_ID_INDEX: + return tool->id_index(tool, event, session); default: return -EINVAL; } @@ -1594,3 +1608,111 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, out: return err; } + +int perf_event__process_id_index(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct id_index_event *ie = &event->id_index; + size_t i, nr, max_nr; + + max_nr = (ie->header.size - sizeof(struct id_index_event)) / + sizeof(struct id_index_entry); + nr = ie->nr; + if (nr > max_nr) + return -EINVAL; + + if (dump_trace) + fprintf(stdout, " nr: %zu\n", nr); + + for (i = 0; i < nr; i++) { + struct id_index_entry *e = &ie->entries[i]; + struct perf_sample_id *sid; + + if (dump_trace) { + fprintf(stdout, " ... id: %"PRIu64, e->id); + fprintf(stdout, " idx: %"PRIu64, e->idx); + fprintf(stdout, " cpu: %"PRId64, e->cpu); + fprintf(stdout, " tid: %"PRId64"\n", e->tid); + } + + sid = perf_evlist__id2sid(evlist, e->id); + if (!sid) + return -ENOENT; + sid->idx = e->idx; + sid->cpu = e->cpu; + sid->tid = e->tid; + } + return 0; +} + +int perf_event__synthesize_id_index(struct perf_tool *tool, + perf_event__handler_t process, + struct perf_evlist *evlist, + struct machine *machine) +{ + union perf_event *ev; + struct perf_evsel *evsel; + size_t nr = 0, i = 0, sz, max_nr, n; + int err; + + pr_debug2("Synthesizing id index\n"); + + max_nr = (UINT16_MAX - sizeof(struct id_index_event)) / + sizeof(struct id_index_entry); + + list_for_each_entry(evsel, &evlist->entries, node) + nr += evsel->ids; + + n = nr > max_nr ? max_nr : nr; + sz = sizeof(struct id_index_event) + n * sizeof(struct id_index_entry); + ev = zalloc(sz); + if (!ev) + return -ENOMEM; + + ev->id_index.header.type = PERF_RECORD_ID_INDEX; + ev->id_index.header.size = sz; + ev->id_index.nr = n; + + list_for_each_entry(evsel, &evlist->entries, node) { + u32 j; + + for (j = 0; j < evsel->ids; j++) { + struct id_index_entry *e; + struct perf_sample_id *sid; + + if (i >= n) { + err = process(tool, ev, NULL, machine); + if (err) + goto out_err; + nr -= n; + i = 0; + } + + e = &ev->id_index.entries[i++]; + + e->id = evsel->id[j]; + + sid = perf_evlist__id2sid(evlist, e->id); + if (!sid) { + free(ev); + return -ENOENT; + } + + e->idx = sid->idx; + e->cpu = sid->cpu; + e->tid = sid->tid; + } + } + + sz = sizeof(struct id_index_event) + nr * sizeof(struct id_index_entry); + ev->id_index.header.size = sz; + ev->id_index.nr = nr; + + err = process(tool, ev, NULL, machine); +out_err: + free(ev); + + return err; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index a4be851f1a90..d8521ac73a10 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -126,4 +126,14 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, extern volatile int session_done; #define session_done() ACCESS_ONCE(session_done) + +int perf_event__process_id_index(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); + +int perf_event__synthesize_id_index(struct perf_tool *tool, + perf_event__handler_t process, + struct perf_evlist *evlist, + struct machine *machine); + #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index f11636966a0f..bb2708bbfaca 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -39,7 +39,8 @@ struct perf_tool { event_attr_op attr; event_op2 tracing_data; event_op2 finished_round, - build_id; + build_id, + id_index; bool ordered_events; bool ordering_requires_timestamps; }; -- cgit v1.2.3 From cba9b847f649af350809d8ff4119e84b0466c1d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 29 Oct 2014 11:31:54 -0200 Subject: perf tools: Use evlist__for_each in a few remaining places Where direct use of the longer form using list_for_entry() was being used. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-v4fw80flg25nkl8jgeod3ot9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/session.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 0babd390963c..7e23dae54f1d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -413,7 +413,7 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) int nfds = 0; struct perf_evsel *evsel; - list_for_each_entry(evsel, &evlist->entries, node) { + evlist__for_each(evlist, evsel) { if (evsel->system_wide) nfds += nr_cpus; else diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 27a0049118b5..58dd5ceb8bef 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1662,7 +1662,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, max_nr = (UINT16_MAX - sizeof(struct id_index_event)) / sizeof(struct id_index_entry); - list_for_each_entry(evsel, &evlist->entries, node) + evlist__for_each(evlist, evsel) nr += evsel->ids; n = nr > max_nr ? max_nr : nr; @@ -1675,7 +1675,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, ev->id_index.header.size = sz; ev->id_index.nr = n; - list_for_each_entry(evsel, &evlist->entries, node) { + evlist__for_each(evlist, evsel) { u32 j; for (j = 0; j < evsel->ids; j++) { -- cgit v1.2.3 From a293829df788ae96a174b315010d4b56a10e5114 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 27 Oct 2014 15:49:23 +0200 Subject: perf session: Add perf_session__deliver_synth_event() Add a function to deliver synthesized events from within a session. Intel PT decoding works by synthesizing events (primarily branch events) that can then be consumed by existing tools. This function will be used to deliver those events. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414417770-18602-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 13 +++++++++++++ tools/perf/util/session.h | 5 +++++ 2 files changed, 18 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 58dd5ceb8bef..f4478ce72fdb 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -907,6 +907,19 @@ static s64 perf_session__process_user_event(struct perf_session *session, } } +int perf_session__deliver_synth_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + events_stats__inc(&session->stats, event->header.type); + + if (event->header.type >= PERF_RECORD_USER_TYPE_START) + return perf_session__process_user_event(session, event, tool, 0); + + return perf_session__deliver_event(session, event, sample, tool, 0); +} + static void event_swap(union perf_event *event, bool sample_id_all) { perf_event__swap_op swap; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index d8521ac73a10..dc26ebf60fe4 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -127,6 +127,11 @@ extern volatile int session_done; #define session_done() ACCESS_ONCE(session_done) +int perf_session__deliver_synth_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool); + int perf_event__process_id_index(struct perf_tool *tool, union perf_event *event, struct perf_session *session); -- cgit v1.2.3 From 00447ccdf3335ea467841fc3c7d65ffd30748895 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:42 +0200 Subject: perf tools: Add a thread stack for synthesizing call chains Add a thread stack for synthesizing call chains from call and return events. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 + tools/perf/util/event.h | 26 +++++++ tools/perf/util/thread-stack.c | 172 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/thread-stack.h | 32 ++++++++ tools/perf/util/thread.c | 3 + tools/perf/util/thread.h | 3 + 6 files changed, 238 insertions(+) create mode 100644 tools/perf/util/thread-stack.c create mode 100644 tools/perf/util/thread-stack.h (limited to 'tools/perf/util') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 3caf7dab50e8..0ebcc4ad0244 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -317,6 +317,7 @@ LIB_H += ui/util.h LIB_H += ui/ui.h LIB_H += util/data.h LIB_H += util/kvm-stat.h +LIB_H += util/thread-stack.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -394,6 +395,7 @@ LIB_OBJS += $(OUTPUT)util/srcline.o LIB_OBJS += $(OUTPUT)util/data.o LIB_OBJS += $(OUTPUT)util/tsc.o LIB_OBJS += $(OUTPUT)util/cloexec.o +LIB_OBJS += $(OUTPUT)util/thread-stack.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8c7fe9d64e79..7be389735402 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -143,6 +143,32 @@ struct branch_stack { struct branch_entry entries[0]; }; +enum { + PERF_IP_FLAG_BRANCH = 1ULL << 0, + PERF_IP_FLAG_CALL = 1ULL << 1, + PERF_IP_FLAG_RETURN = 1ULL << 2, + PERF_IP_FLAG_CONDITIONAL = 1ULL << 3, + PERF_IP_FLAG_SYSCALLRET = 1ULL << 4, + PERF_IP_FLAG_ASYNC = 1ULL << 5, + PERF_IP_FLAG_INTERRUPT = 1ULL << 6, + PERF_IP_FLAG_TX_ABORT = 1ULL << 7, + PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8, + PERF_IP_FLAG_TRACE_END = 1ULL << 9, + PERF_IP_FLAG_IN_TX = 1ULL << 10, +}; + +#define PERF_BRANCH_MASK (\ + PERF_IP_FLAG_BRANCH |\ + PERF_IP_FLAG_CALL |\ + PERF_IP_FLAG_RETURN |\ + PERF_IP_FLAG_CONDITIONAL |\ + PERF_IP_FLAG_SYSCALLRET |\ + PERF_IP_FLAG_ASYNC |\ + PERF_IP_FLAG_INTERRUPT |\ + PERF_IP_FLAG_TX_ABORT |\ + PERF_IP_FLAG_TRACE_BEGIN |\ + PERF_IP_FLAG_TRACE_END) + struct perf_sample { u64 ip; u32 pid, tid; diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c new file mode 100644 index 000000000000..85b60d2e738f --- /dev/null +++ b/tools/perf/util/thread-stack.c @@ -0,0 +1,172 @@ +/* + * thread-stack.c: Synthesize a thread's stack using call / return events + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include "thread.h" +#include "event.h" +#include "util.h" +#include "debug.h" +#include "thread-stack.h" + +#define STACK_GROWTH 4096 + +struct thread_stack_entry { + u64 ret_addr; +}; + +struct thread_stack { + struct thread_stack_entry *stack; + size_t cnt; + size_t sz; + u64 trace_nr; +}; + +static int thread_stack__grow(struct thread_stack *ts) +{ + struct thread_stack_entry *new_stack; + size_t sz, new_sz; + + new_sz = ts->sz + STACK_GROWTH; + sz = new_sz * sizeof(struct thread_stack_entry); + + new_stack = realloc(ts->stack, sz); + if (!new_stack) + return -ENOMEM; + + ts->stack = new_stack; + ts->sz = new_sz; + + return 0; +} + +static struct thread_stack *thread_stack__new(void) +{ + struct thread_stack *ts; + + ts = zalloc(sizeof(struct thread_stack)); + if (!ts) + return NULL; + + if (thread_stack__grow(ts)) { + free(ts); + return NULL; + } + + return ts; +} + +static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) +{ + int err = 0; + + if (ts->cnt == ts->sz) { + err = thread_stack__grow(ts); + if (err) { + pr_warning("Out of memory: discarding thread stack\n"); + ts->cnt = 0; + } + } + + ts->stack[ts->cnt++].ret_addr = ret_addr; + + return err; +} + +static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) +{ + size_t i; + + /* + * In some cases there may be functions which are not seen to return. + * For example when setjmp / longjmp has been used. Or the perf context + * switch in the kernel which doesn't stop and start tracing in exactly + * the same code path. When that happens the return address will be + * further down the stack. If the return address is not found at all, + * we assume the opposite (i.e. this is a return for a call that wasn't + * seen for some reason) and leave the stack alone. + */ + for (i = ts->cnt; i; ) { + if (ts->stack[--i].ret_addr == ret_addr) { + ts->cnt = i; + return; + } + } +} + +int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, + u64 to_ip, u16 insn_len, u64 trace_nr) +{ + if (!thread) + return -EINVAL; + + if (!thread->ts) { + thread->ts = thread_stack__new(); + if (!thread->ts) { + pr_warning("Out of memory: no thread stack\n"); + return -ENOMEM; + } + thread->ts->trace_nr = trace_nr; + } + + /* + * When the trace is discontinuous, the trace_nr changes. In that case + * the stack might be completely invalid. Better to report nothing than + * to report something misleading, so reset the stack count to zero. + */ + if (trace_nr != thread->ts->trace_nr) { + thread->ts->trace_nr = trace_nr; + thread->ts->cnt = 0; + } + + if (flags & PERF_IP_FLAG_CALL) { + u64 ret_addr; + + if (!to_ip) + return 0; + ret_addr = from_ip + insn_len; + if (ret_addr == to_ip) + return 0; /* Zero-length calls are excluded */ + return thread_stack__push(thread->ts, ret_addr); + } else if (flags & PERF_IP_FLAG_RETURN) { + if (!from_ip) + return 0; + thread_stack__pop(thread->ts, to_ip); + } + + return 0; +} + +void thread_stack__free(struct thread *thread) +{ + if (thread->ts) { + zfree(&thread->ts->stack); + zfree(&thread->ts); + } +} + +void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, + size_t sz, u64 ip) +{ + size_t i; + + if (!thread || !thread->ts) + chain->nr = 1; + else + chain->nr = min(sz, thread->ts->cnt + 1); + + chain->ips[0] = ip; + + for (i = 1; i < chain->nr; i++) + chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; +} diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h new file mode 100644 index 000000000000..7c41579aec74 --- /dev/null +++ b/tools/perf/util/thread-stack.h @@ -0,0 +1,32 @@ +/* + * thread-stack.h: Synthesize a thread's stack using call / return events + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_THREAD_STACK_H +#define __PERF_THREAD_STACK_H + +#include + +#include + +struct thread; +struct ip_callchain; + +int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, + u64 to_ip, u16 insn_len, u64 trace_nr); +void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, + size_t sz, u64 ip); +void thread_stack__free(struct thread *thread); + +#endif diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index bf5bf858b7f6..a2157f0ef1df 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -4,6 +4,7 @@ #include #include "session.h" #include "thread.h" +#include "thread-stack.h" #include "util.h" #include "debug.h" #include "comm.h" @@ -66,6 +67,8 @@ void thread__delete(struct thread *thread) { struct comm *comm, *tmp; + thread_stack__free(thread); + if (thread->mg) { map_groups__put(thread->mg); thread->mg = NULL; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index d34cf5c0d0d9..160fd066a7d1 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -8,6 +8,8 @@ #include "symbol.h" #include +struct thread_stack; + struct thread { union { struct rb_node rb_node; @@ -26,6 +28,7 @@ struct thread { u64 db_id; void *priv; + struct thread_stack *ts; }; struct machine; -- cgit v1.2.3 From 92a9e4f7db89a013e1bdef2e548928fc71e9867c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:45 +0200 Subject: perf tools: Enhance the thread stack to output call/return data Enhance the thread stack to output detailed information about paired calls and returns. The enhanced processing consumes sample information via thread_stack__process() and outputs information about paired calls / returns via a call-back. While the call-back makes it possible for the facility to be used by arbitrary tools, a subsequent patch will provide the information to Python scripting via the db-export interface. An important part of the call/return information is the call path which provides a structure that defines a context sensitive call graph. Note that there are now two ways to use the thread stack. For simply providing a call stack (like you would get from the perf record -g option) the interface consists of thread_stack__event() and thread_stack__sample(). Whereas the enhanced interface consists of call_return_processor__new() and thread_stack__process(). Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread-stack.c | 585 ++++++++++++++++++++++++++++++++++++++++- tools/perf/util/thread-stack.h | 79 ++++++ 2 files changed, 659 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 85b60d2e738f..9ed59a452d1f 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -13,23 +13,96 @@ * */ +#include +#include #include "thread.h" #include "event.h" +#include "machine.h" #include "util.h" #include "debug.h" +#include "symbol.h" +#include "comm.h" #include "thread-stack.h" -#define STACK_GROWTH 4096 +#define CALL_PATH_BLOCK_SHIFT 8 +#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT) +#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1) +struct call_path_block { + struct call_path cp[CALL_PATH_BLOCK_SIZE]; + struct list_head node; +}; + +/** + * struct call_path_root - root of all call paths. + * @call_path: root call path + * @blocks: list of blocks to store call paths + * @next: next free space + * @sz: number of spaces + */ +struct call_path_root { + struct call_path call_path; + struct list_head blocks; + size_t next; + size_t sz; +}; + +/** + * struct call_return_processor - provides a call-back to consume call-return + * information. + * @cpr: call path root + * @process: call-back that accepts call/return information + * @data: anonymous data for call-back + */ +struct call_return_processor { + struct call_path_root *cpr; + int (*process)(struct call_return *cr, void *data); + void *data; +}; + +#define STACK_GROWTH 2048 + +/** + * struct thread_stack_entry - thread stack entry. + * @ret_addr: return address + * @timestamp: timestamp (if known) + * @ref: external reference (e.g. db_id of sample) + * @branch_count: the branch count when the entry was created + * @cp: call path + * @no_call: a 'call' was not seen + */ struct thread_stack_entry { u64 ret_addr; + u64 timestamp; + u64 ref; + u64 branch_count; + struct call_path *cp; + bool no_call; }; +/** + * struct thread_stack - thread stack constructed from 'call' and 'return' + * branch samples. + * @stack: array that holds the stack + * @cnt: number of entries in the stack + * @sz: current maximum stack size + * @trace_nr: current trace number + * @branch_count: running branch count + * @kernel_start: kernel start address + * @last_time: last timestamp + * @crp: call/return processor + * @comm: current comm + */ struct thread_stack { struct thread_stack_entry *stack; size_t cnt; size_t sz; u64 trace_nr; + u64 branch_count; + u64 kernel_start; + u64 last_time; + struct call_return_processor *crp; + struct comm *comm; }; static int thread_stack__grow(struct thread_stack *ts) @@ -50,7 +123,8 @@ static int thread_stack__grow(struct thread_stack *ts) return 0; } -static struct thread_stack *thread_stack__new(void) +static struct thread_stack *thread_stack__new(struct thread *thread, + struct call_return_processor *crp) { struct thread_stack *ts; @@ -63,6 +137,12 @@ static struct thread_stack *thread_stack__new(void) return NULL; } + if (thread->mg && thread->mg->machine) + ts->kernel_start = machine__kernel_start(thread->mg->machine); + else + ts->kernel_start = 1ULL << 63; + ts->crp = crp; + return ts; } @@ -104,6 +184,64 @@ static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) } } +static bool thread_stack__in_kernel(struct thread_stack *ts) +{ + if (!ts->cnt) + return false; + + return ts->stack[ts->cnt - 1].cp->in_kernel; +} + +static int thread_stack__call_return(struct thread *thread, + struct thread_stack *ts, size_t idx, + u64 timestamp, u64 ref, bool no_return) +{ + struct call_return_processor *crp = ts->crp; + struct thread_stack_entry *tse; + struct call_return cr = { + .thread = thread, + .comm = ts->comm, + .db_id = 0, + }; + + tse = &ts->stack[idx]; + cr.cp = tse->cp; + cr.call_time = tse->timestamp; + cr.return_time = timestamp; + cr.branch_count = ts->branch_count - tse->branch_count; + cr.call_ref = tse->ref; + cr.return_ref = ref; + if (tse->no_call) + cr.flags |= CALL_RETURN_NO_CALL; + if (no_return) + cr.flags |= CALL_RETURN_NO_RETURN; + + return crp->process(&cr, crp->data); +} + +static int thread_stack__flush(struct thread *thread, struct thread_stack *ts) +{ + struct call_return_processor *crp = ts->crp; + int err; + + if (!crp) { + ts->cnt = 0; + return 0; + } + + while (ts->cnt) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + ts->last_time, 0, true); + if (err) { + pr_err("Error flushing thread stack!\n"); + ts->cnt = 0; + return err; + } + } + + return 0; +} + int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr) { @@ -111,7 +249,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, return -EINVAL; if (!thread->ts) { - thread->ts = thread_stack__new(); + thread->ts = thread_stack__new(thread, NULL); if (!thread->ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; @@ -122,13 +260,18 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, /* * When the trace is discontinuous, the trace_nr changes. In that case * the stack might be completely invalid. Better to report nothing than - * to report something misleading, so reset the stack count to zero. + * to report something misleading, so flush the stack. */ if (trace_nr != thread->ts->trace_nr) { + if (thread->ts->trace_nr) + thread_stack__flush(thread, thread->ts); thread->ts->trace_nr = trace_nr; - thread->ts->cnt = 0; } + /* Stop here if thread_stack__process() is in use */ + if (thread->ts->crp) + return 0; + if (flags & PERF_IP_FLAG_CALL) { u64 ret_addr; @@ -147,9 +290,22 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, return 0; } +void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) +{ + if (!thread || !thread->ts) + return; + + if (trace_nr != thread->ts->trace_nr) { + if (thread->ts->trace_nr) + thread_stack__flush(thread, thread->ts); + thread->ts->trace_nr = trace_nr; + } +} + void thread_stack__free(struct thread *thread) { if (thread->ts) { + thread_stack__flush(thread, thread->ts); zfree(&thread->ts->stack); zfree(&thread->ts); } @@ -170,3 +326,422 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, for (i = 1; i < chain->nr; i++) chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; } + +static void call_path__init(struct call_path *cp, struct call_path *parent, + struct symbol *sym, u64 ip, bool in_kernel) +{ + cp->parent = parent; + cp->sym = sym; + cp->ip = sym ? 0 : ip; + cp->db_id = 0; + cp->in_kernel = in_kernel; + RB_CLEAR_NODE(&cp->rb_node); + cp->children = RB_ROOT; +} + +static struct call_path_root *call_path_root__new(void) +{ + struct call_path_root *cpr; + + cpr = zalloc(sizeof(struct call_path_root)); + if (!cpr) + return NULL; + call_path__init(&cpr->call_path, NULL, NULL, 0, false); + INIT_LIST_HEAD(&cpr->blocks); + return cpr; +} + +static void call_path_root__free(struct call_path_root *cpr) +{ + struct call_path_block *pos, *n; + + list_for_each_entry_safe(pos, n, &cpr->blocks, node) { + list_del(&pos->node); + free(pos); + } + free(cpr); +} + +static struct call_path *call_path__new(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, + bool in_kernel) +{ + struct call_path_block *cpb; + struct call_path *cp; + size_t n; + + if (cpr->next < cpr->sz) { + cpb = list_last_entry(&cpr->blocks, struct call_path_block, + node); + } else { + cpb = zalloc(sizeof(struct call_path_block)); + if (!cpb) + return NULL; + list_add_tail(&cpb->node, &cpr->blocks); + cpr->sz += CALL_PATH_BLOCK_SIZE; + } + + n = cpr->next++ & CALL_PATH_BLOCK_MASK; + cp = &cpb->cp[n]; + + call_path__init(cp, parent, sym, ip, in_kernel); + + return cp; +} + +static struct call_path *call_path__findnew(struct call_path_root *cpr, + struct call_path *parent, + struct symbol *sym, u64 ip, u64 ks) +{ + struct rb_node **p; + struct rb_node *node_parent = NULL; + struct call_path *cp; + bool in_kernel = ip >= ks; + + if (sym) + ip = 0; + + if (!parent) + return call_path__new(cpr, parent, sym, ip, in_kernel); + + p = &parent->children.rb_node; + while (*p != NULL) { + node_parent = *p; + cp = rb_entry(node_parent, struct call_path, rb_node); + + if (cp->sym == sym && cp->ip == ip) + return cp; + + if (sym < cp->sym || (sym == cp->sym && ip < cp->ip)) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + cp = call_path__new(cpr, parent, sym, ip, in_kernel); + if (!cp) + return NULL; + + rb_link_node(&cp->rb_node, node_parent, p); + rb_insert_color(&cp->rb_node, &parent->children); + + return cp; +} + +struct call_return_processor * +call_return_processor__new(int (*process)(struct call_return *cr, void *data), + void *data) +{ + struct call_return_processor *crp; + + crp = zalloc(sizeof(struct call_return_processor)); + if (!crp) + return NULL; + crp->cpr = call_path_root__new(); + if (!crp->cpr) + goto out_free; + crp->process = process; + crp->data = data; + return crp; + +out_free: + free(crp); + return NULL; +} + +void call_return_processor__free(struct call_return_processor *crp) +{ + if (crp) { + call_path_root__free(crp->cpr); + free(crp); + } +} + +static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, + u64 timestamp, u64 ref, struct call_path *cp, + bool no_call) +{ + struct thread_stack_entry *tse; + int err; + + if (ts->cnt == ts->sz) { + err = thread_stack__grow(ts); + if (err) + return err; + } + + tse = &ts->stack[ts->cnt++]; + tse->ret_addr = ret_addr; + tse->timestamp = timestamp; + tse->ref = ref; + tse->branch_count = ts->branch_count; + tse->cp = cp; + tse->no_call = no_call; + + return 0; +} + +static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, + u64 ret_addr, u64 timestamp, u64 ref, + struct symbol *sym) +{ + int err; + + if (!ts->cnt) + return 1; + + if (ts->cnt == 1) { + struct thread_stack_entry *tse = &ts->stack[0]; + + if (tse->cp->sym == sym) + return thread_stack__call_return(thread, ts, --ts->cnt, + timestamp, ref, false); + } + + if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { + return thread_stack__call_return(thread, ts, --ts->cnt, + timestamp, ref, false); + } else { + size_t i = ts->cnt - 1; + + while (i--) { + if (ts->stack[i].ret_addr != ret_addr) + continue; + i += 1; + while (ts->cnt > i) { + err = thread_stack__call_return(thread, ts, + --ts->cnt, + timestamp, ref, + true); + if (err) + return err; + } + return thread_stack__call_return(thread, ts, --ts->cnt, + timestamp, ref, false); + } + } + + return 1; +} + +static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, + struct perf_sample *sample, + struct addr_location *from_al, + struct addr_location *to_al, u64 ref) +{ + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp; + struct symbol *sym; + u64 ip; + + if (sample->ip) { + ip = sample->ip; + sym = from_al->sym; + } else if (sample->addr) { + ip = sample->addr; + sym = to_al->sym; + } else { + return 0; + } + + cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, + ts->kernel_start); + if (!cp) + return -ENOMEM; + + return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, + true); +} + +static int thread_stack__no_call_return(struct thread *thread, + struct thread_stack *ts, + struct perf_sample *sample, + struct addr_location *from_al, + struct addr_location *to_al, u64 ref) +{ + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp, *parent; + u64 ks = ts->kernel_start; + int err; + + if (sample->ip >= ks && sample->addr < ks) { + /* Return to userspace, so pop all kernel addresses */ + while (thread_stack__in_kernel(ts)) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + sample->time, ref, + true); + if (err) + return err; + } + + /* If the stack is empty, push the userspace address */ + if (!ts->cnt) { + cp = call_path__findnew(cpr, &cpr->call_path, + to_al->sym, sample->addr, + ts->kernel_start); + if (!cp) + return -ENOMEM; + return thread_stack__push_cp(ts, 0, sample->time, ref, + cp, true); + } + } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { + /* Return to userspace, so pop all kernel addresses */ + while (thread_stack__in_kernel(ts)) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + sample->time, ref, + true); + if (err) + return err; + } + } + + if (ts->cnt) + parent = ts->stack[ts->cnt - 1].cp; + else + parent = &cpr->call_path; + + /* This 'return' had no 'call', so push and pop top of stack */ + cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, + ts->kernel_start); + if (!cp) + return -ENOMEM; + + err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, + true); + if (err) + return err; + + return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, + to_al->sym); +} + +static int thread_stack__trace_begin(struct thread *thread, + struct thread_stack *ts, u64 timestamp, + u64 ref) +{ + struct thread_stack_entry *tse; + int err; + + if (!ts->cnt) + return 0; + + /* Pop trace end */ + tse = &ts->stack[ts->cnt - 1]; + if (tse->cp->sym == NULL && tse->cp->ip == 0) { + err = thread_stack__call_return(thread, ts, --ts->cnt, + timestamp, ref, false); + if (err) + return err; + } + + return 0; +} + +static int thread_stack__trace_end(struct thread_stack *ts, + struct perf_sample *sample, u64 ref) +{ + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp; + u64 ret_addr; + + /* No point having 'trace end' on the bottom of the stack */ + if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) + return 0; + + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, + ts->kernel_start); + if (!cp) + return -ENOMEM; + + ret_addr = sample->ip + sample->insn_len; + + return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, + false); +} + +int thread_stack__process(struct thread *thread, struct comm *comm, + struct perf_sample *sample, + struct addr_location *from_al, + struct addr_location *to_al, u64 ref, + struct call_return_processor *crp) +{ + struct thread_stack *ts = thread->ts; + int err = 0; + + if (ts) { + if (!ts->crp) { + /* Supersede thread_stack__event() */ + thread_stack__free(thread); + thread->ts = thread_stack__new(thread, crp); + if (!thread->ts) + return -ENOMEM; + ts = thread->ts; + ts->comm = comm; + } + } else { + thread->ts = thread_stack__new(thread, crp); + if (!thread->ts) + return -ENOMEM; + ts = thread->ts; + ts->comm = comm; + } + + /* Flush stack on exec */ + if (ts->comm != comm && thread->pid_ == thread->tid) { + err = thread_stack__flush(thread, ts); + if (err) + return err; + ts->comm = comm; + } + + /* If the stack is empty, put the current symbol on the stack */ + if (!ts->cnt) { + err = thread_stack__bottom(thread, ts, sample, from_al, to_al, + ref); + if (err) + return err; + } + + ts->branch_count += 1; + ts->last_time = sample->time; + + if (sample->flags & PERF_IP_FLAG_CALL) { + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp; + u64 ret_addr; + + if (!sample->ip || !sample->addr) + return 0; + + ret_addr = sample->ip + sample->insn_len; + if (ret_addr == sample->addr) + return 0; /* Zero-length calls are excluded */ + + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, + to_al->sym, sample->addr, + ts->kernel_start); + if (!cp) + return -ENOMEM; + err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, + cp, false); + } else if (sample->flags & PERF_IP_FLAG_RETURN) { + if (!sample->ip || !sample->addr) + return 0; + + err = thread_stack__pop_cp(thread, ts, sample->addr, + sample->time, ref, from_al->sym); + if (err) { + if (err < 0) + return err; + err = thread_stack__no_call_return(thread, ts, sample, + from_al, to_al, ref); + } + } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { + err = thread_stack__trace_begin(thread, ts, sample->time, ref); + } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { + err = thread_stack__trace_end(ts, sample, ref); + } + + return err; +} diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 7c41579aec74..b843bbef8ba2 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -19,14 +19,93 @@ #include #include +#include struct thread; +struct comm; struct ip_callchain; +struct symbol; +struct dso; +struct call_return_processor; +struct comm; +struct perf_sample; +struct addr_location; + +/* + * Call/Return flags. + * + * CALL_RETURN_NO_CALL: 'return' but no matching 'call' + * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' + */ +enum { + CALL_RETURN_NO_CALL = 1 << 0, + CALL_RETURN_NO_RETURN = 1 << 1, +}; + +/** + * struct call_return - paired call/return information. + * @thread: thread in which call/return occurred + * @comm: comm in which call/return occurred + * @cp: call path + * @call_time: timestamp of call (if known) + * @return_time: timestamp of return (if known) + * @branch_count: number of branches seen between call and return + * @call_ref: external reference to 'call' sample (e.g. db_id) + * @return_ref: external reference to 'return' sample (e.g. db_id) + * @db_id: id used for db-export + * @flags: Call/Return flags + */ +struct call_return { + struct thread *thread; + struct comm *comm; + struct call_path *cp; + u64 call_time; + u64 return_time; + u64 branch_count; + u64 call_ref; + u64 return_ref; + u64 db_id; + u32 flags; +}; + +/** + * struct call_path - node in list of calls leading to a function call. + * @parent: call path to the parent function call + * @sym: symbol of function called + * @ip: only if sym is null, the ip of the function + * @db_id: id used for db-export + * @in_kernel: whether function is a in the kernel + * @rb_node: node in parent's tree of called functions + * @children: tree of call paths of functions called + * + * In combination with the call_return structure, the call_path structure + * defines a context-sensitve call-graph. + */ +struct call_path { + struct call_path *parent; + struct symbol *sym; + u64 ip; + u64 db_id; + bool in_kernel; + struct rb_node rb_node; + struct rb_root children; +}; int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); +void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, size_t sz, u64 ip); void thread_stack__free(struct thread *thread); +struct call_return_processor * +call_return_processor__new(int (*process)(struct call_return *cr, void *data), + void *data); +void call_return_processor__free(struct call_return_processor *crp); +int thread_stack__process(struct thread *thread, struct comm *comm, + struct perf_sample *sample, + struct addr_location *from_al, + struct addr_location *to_al, u64 ref, + struct call_return_processor *crp); + #endif -- cgit v1.2.3 From f2bff007679e7d293cb07bb26e18ccf11cc1c4b2 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:43 +0200 Subject: perf tools: Add branch type to db export Add the ability to export branch types through the database export facility. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/db-export.h | 6 ++++++ 2 files changed, 54 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index be128b075a32..bccb83120971 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -208,6 +208,15 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, return 0; } +int db_export__branch_type(struct db_export *dbe, u32 branch_type, + const char *name) +{ + if (dbe->export_branch_type) + return dbe->export_branch_type(dbe, branch_type, name); + + return 0; +} + int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, struct addr_location *al) @@ -268,3 +277,42 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, return 0; } + +static struct { + u32 branch_type; + const char *name; +} branch_types[] = { + {0, "no branch"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "conditional jump"}, + {PERF_IP_FLAG_BRANCH, "unconditional jump"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, + "software interrupt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, + "return from interrupt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, + "system call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, + "return from system call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "asynchronous branch"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | + PERF_IP_FLAG_INTERRUPT, "hardware interrupt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"}, + {0, NULL} +}; + +int db_export__branch_types(struct db_export *dbe) +{ + int i, err = 0; + + for (i = 0; branch_types[i].name ; i++) { + err = db_export__branch_type(dbe, branch_types[i].branch_type, + branch_types[i].name); + if (err) + break; + } + return err; +} diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index b3643e8e5750..e4baa45ead70 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -54,6 +54,8 @@ struct db_export { struct machine *machine); int (*export_symbol)(struct db_export *dbe, struct symbol *sym, struct dso *dso); + int (*export_branch_type)(struct db_export *dbe, u32 branch_type, + const char *name); int (*export_sample)(struct db_export *dbe, struct export_sample *es); u64 evsel_last_db_id; u64 machine_last_db_id; @@ -79,8 +81,12 @@ int db_export__dso(struct db_export *dbe, struct dso *dso, struct machine *machine); int db_export__symbol(struct db_export *dbe, struct symbol *sym, struct dso *dso); +int db_export__branch_type(struct db_export *dbe, u32 branch_type, + const char *name); int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, struct addr_location *al); +int db_export__branch_types(struct db_export *dbe); + #endif -- cgit v1.2.3 From c29414f5cfd641d956c5287848fdd8f25bb2afa3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:44 +0200 Subject: perf tools: Add branch_type and in_tx to Python export Add branch_type and in_tx to Python db export and the export-to-postgresql.py script. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 32 ++++++++++++++++++---- .../util/scripting-engines/trace-event-python.c | 30 +++++++++++++++++++- 2 files changed, 55 insertions(+), 7 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index d8f6df0093d6..bb79aecccf58 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -123,6 +123,10 @@ do_query(query, 'CREATE TABLE symbols (' 'sym_end bigint,' 'binding integer,' 'name varchar(2048))') +do_query(query, 'CREATE TABLE branch_types (' + 'id integer NOT NULL,' + 'name varchar(80))') + if branches: do_query(query, 'CREATE TABLE samples (' 'id bigint NOT NULL,' @@ -139,7 +143,9 @@ if branches: 'to_dso_id bigint,' 'to_symbol_id bigint,' 'to_sym_offset bigint,' - 'to_ip bigint)') + 'to_ip bigint,' + 'branch_type integer,' + 'in_tx boolean)') else: do_query(query, 'CREATE TABLE samples (' 'id bigint NOT NULL,' @@ -160,7 +166,9 @@ else: 'period bigint,' 'weight bigint,' 'transaction bigint,' - 'data_src bigint)') + 'data_src bigint,' + 'branch_type integer,' + 'in_tx boolean)') do_query(query, 'CREATE VIEW samples_view AS ' 'SELECT ' @@ -178,7 +186,9 @@ do_query(query, 'CREATE VIEW samples_view AS ' 'to_hex(to_ip) AS to_ip_hex,' '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,' 'to_sym_offset,' - '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name' + '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' + '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' + 'in_tx' ' FROM samples') @@ -234,6 +244,7 @@ comm_file = open_output_file("comm_table.bin") comm_thread_file = open_output_file("comm_thread_table.bin") dso_file = open_output_file("dso_table.bin") symbol_file = open_output_file("symbol_table.bin") +branch_type_file = open_output_file("branch_type_table.bin") sample_file = open_output_file("sample_table.bin") def trace_begin(): @@ -257,6 +268,7 @@ def trace_end(): copy_output_file(comm_thread_file, "comm_threads") copy_output_file(dso_file, "dsos") copy_output_file(symbol_file, "symbols") + copy_output_file(branch_type_file, "branch_types") copy_output_file(sample_file, "samples") print datetime.datetime.today(), "Removing intermediate files..." @@ -267,6 +279,7 @@ def trace_end(): remove_output_file(comm_thread_file) remove_output_file(dso_file) remove_output_file(symbol_file) + remove_output_file(branch_type_file) remove_output_file(sample_file) os.rmdir(output_dir_name) print datetime.datetime.today(), "Adding primary keys" @@ -277,6 +290,7 @@ def trace_end(): do_query(query, 'ALTER TABLE comm_threads ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE dsos ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)') print datetime.datetime.today(), "Adding foreign keys" @@ -352,9 +366,15 @@ def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name) symbol_file.write(value) -def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, *x): +def branch_type_table(branch_type, name, *x): + n = len(name) + fmt = "!hiii" + str(n) + "s" + value = struct.pack(fmt, 2, 4, branch_type, n, name) + branch_type_file.write(value) + +def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x): if branches: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiq", 15, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx) else: - value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiq", 19, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src) + value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx) sample_file.write(value) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 2fd7ee8f18c7..f3ca7798b3d0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -66,6 +66,7 @@ struct tables { PyObject *comm_thread_handler; PyObject *dso_handler; PyObject *symbol_handler; + PyObject *branch_type_handler; PyObject *sample_handler; bool db_export_mode; }; @@ -664,13 +665,31 @@ static int python_export_symbol(struct db_export *dbe, struct symbol *sym, return 0; } +static int python_export_branch_type(struct db_export *dbe, u32 branch_type, + const char *name) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + + t = tuple_new(2); + + tuple_set_s32(t, 0, branch_type); + tuple_set_string(t, 1, name); + + call_object(tables->branch_type_handler, t, "branch_type_table"); + + Py_DECREF(t); + + return 0; +} + static int python_export_sample(struct db_export *dbe, struct export_sample *es) { struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(19); + t = tuple_new(21); tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); @@ -691,6 +710,8 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 16, es->sample->weight); tuple_set_u64(t, 17, es->sample->transaction); tuple_set_u64(t, 18, es->sample->data_src); + tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); + tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); call_object(tables->sample_handler, t, "sample_table"); @@ -861,6 +882,7 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(comm_thread); SET_TABLE_HANDLER(dso); SET_TABLE_HANDLER(symbol); + SET_TABLE_HANDLER(branch_type); SET_TABLE_HANDLER(sample); } @@ -910,6 +932,12 @@ static int python_start_script(const char *script, int argc, const char **argv) set_table_handlers(tables); + if (tables->db_export_mode) { + err = db_export__branch_types(&tables->dbe); + if (err) + goto error; + } + return err; error: Py_Finalize(); -- cgit v1.2.3 From 88f50d602f500d206f2f5a9a9751dd45f2d97739 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:46 +0200 Subject: perf tools: Add call information to the database export API Make it possible for the database export API to use the enhanced thread stack and export detailed information about paired calls and returns. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 52 ++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/db-export.h | 12 +++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index bccb83120971..017ecbb0ec05 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -21,6 +21,7 @@ #include "comm.h" #include "symbol.h" #include "event.h" +#include "thread-stack.h" #include "db-export.h" int db_export__init(struct db_export *dbe) @@ -29,8 +30,10 @@ int db_export__init(struct db_export *dbe) return 0; } -void db_export__exit(struct db_export *dbe __maybe_unused) +void db_export__exit(struct db_export *dbe) { + call_return_processor__free(dbe->crp); + dbe->crp = NULL; } int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel) @@ -270,6 +273,13 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, &es.addr_sym_db_id, &es.addr_offset); if (err) return err; + if (dbe->crp) { + err = thread_stack__process(thread, comm, sample, al, + &addr_al, es.db_id, + dbe->crp); + if (err) + return err; + } } if (dbe->export_sample) @@ -316,3 +326,43 @@ int db_export__branch_types(struct db_export *dbe) } return err; } + +int db_export__call_path(struct db_export *dbe, struct call_path *cp) +{ + int err; + + if (cp->db_id) + return 0; + + if (cp->parent) { + err = db_export__call_path(dbe, cp->parent); + if (err) + return err; + } + + cp->db_id = ++dbe->call_path_last_db_id; + + if (dbe->export_call_path) + return dbe->export_call_path(dbe, cp); + + return 0; +} + +int db_export__call_return(struct db_export *dbe, struct call_return *cr) +{ + int err; + + if (cr->db_id) + return 0; + + err = db_export__call_path(dbe, cr->cp); + if (err) + return err; + + cr->db_id = ++dbe->call_return_last_db_id; + + if (dbe->export_call_return) + return dbe->export_call_return(dbe, cr); + + return 0; +} diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index e4baa45ead70..dd5ac2ae97d4 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -25,6 +25,9 @@ struct comm; struct dso; struct perf_sample; struct addr_location; +struct call_return_processor; +struct call_path; +struct call_return; struct export_sample { union perf_event *event; @@ -57,6 +60,10 @@ struct db_export { int (*export_branch_type)(struct db_export *dbe, u32 branch_type, const char *name); int (*export_sample)(struct db_export *dbe, struct export_sample *es); + int (*export_call_path)(struct db_export *dbe, struct call_path *cp); + int (*export_call_return)(struct db_export *dbe, + struct call_return *cr); + struct call_return_processor *crp; u64 evsel_last_db_id; u64 machine_last_db_id; u64 thread_last_db_id; @@ -65,6 +72,8 @@ struct db_export { u64 dso_last_db_id; u64 symbol_last_db_id; u64 sample_last_db_id; + u64 call_path_last_db_id; + u64 call_return_last_db_id; }; int db_export__init(struct db_export *dbe); @@ -89,4 +98,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, int db_export__branch_types(struct db_export *dbe); +int db_export__call_path(struct db_export *dbe, struct call_path *cp); +int db_export__call_return(struct db_export *dbe, struct call_return *cr); + #endif -- cgit v1.2.3 From 6a70307ddcd9999598c399d55dc44c07816a575f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:47 +0200 Subject: perf tools: Add call information to Python export Add the ability to export detailed information about paired calls and returns to Python db export and the export-to-postgresql.py script. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/bin/export-to-postgresql-report | 15 ++-- tools/perf/scripts/python/export-to-postgresql.py | 66 ++++++++++++++++- .../util/scripting-engines/trace-event-python.c | 84 +++++++++++++++++++++- 3 files changed, 158 insertions(+), 7 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/scripts/python/bin/export-to-postgresql-report b/tools/perf/scripts/python/bin/export-to-postgresql-report index a8fdd15f85bf..cd335b6e2a01 100644 --- a/tools/perf/scripts/python/bin/export-to-postgresql-report +++ b/tools/perf/scripts/python/bin/export-to-postgresql-report @@ -1,6 +1,6 @@ #!/bin/bash # description: export perf data to a postgresql database -# args: [database name] [columns] +# args: [database name] [columns] [calls] n_args=0 for i in "$@" do @@ -9,11 +9,16 @@ do fi n_args=$(( $n_args + 1 )) done -if [ "$n_args" -gt 2 ] ; then - echo "usage: export-to-postgresql-report [database name] [columns]" +if [ "$n_args" -gt 3 ] ; then + echo "usage: export-to-postgresql-report [database name] [columns] [calls]" exit fi -if [ "$n_args" -gt 1 ] ; then +if [ "$n_args" -gt 2 ] ; then + dbname=$1 + columns=$2 + calls=$3 + shift 3 +elif [ "$n_args" -gt 1 ] ; then dbname=$1 columns=$2 shift 2 @@ -21,4 +26,4 @@ elif [ "$n_args" -gt 0 ] ; then dbname=$1 shift fi -perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-postgresql.py $dbname $columns +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-postgresql.py $dbname $columns $calls diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index bb79aecccf58..4cdafd880074 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -40,10 +40,12 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ #from Core import * perf_db_export_mode = True +perf_db_export_calls = False def usage(): - print >> sys.stderr, "Usage is: export-to-postgresql.py []" + print >> sys.stderr, "Usage is: export-to-postgresql.py [] []" print >> sys.stderr, "where: columns 'all' or 'branches'" + print >> sys.stderr, " calls 'calls' => create calls table" raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -61,6 +63,12 @@ if columns not in ("all", "branches"): branches = (columns == "branches") +if (len(sys.argv) >= 4): + if (sys.argv[3] == "calls"): + perf_db_export_calls = True + else: + usage() + output_dir_name = os.getcwd() + "/" + dbname + "-perf-data" os.mkdir(output_dir_name) @@ -170,6 +178,25 @@ else: 'branch_type integer,' 'in_tx boolean)') +if perf_db_export_calls: + do_query(query, 'CREATE TABLE call_paths (' + 'id bigint NOT NULL,' + 'parent_id bigint,' + 'symbol_id bigint,' + 'ip bigint)') + do_query(query, 'CREATE TABLE calls (' + 'id bigint NOT NULL,' + 'thread_id bigint,' + 'comm_id bigint,' + 'call_path_id bigint,' + 'call_time bigint,' + 'return_time bigint,' + 'branch_count bigint,' + 'call_id bigint,' + 'return_id bigint,' + 'parent_call_path_id bigint,' + 'flags integer)') + do_query(query, 'CREATE VIEW samples_view AS ' 'SELECT ' 'id,' @@ -246,6 +273,9 @@ dso_file = open_output_file("dso_table.bin") symbol_file = open_output_file("symbol_table.bin") branch_type_file = open_output_file("branch_type_table.bin") sample_file = open_output_file("sample_table.bin") +if perf_db_export_calls: + call_path_file = open_output_file("call_path_table.bin") + call_file = open_output_file("call_table.bin") def trace_begin(): print datetime.datetime.today(), "Writing to intermediate files..." @@ -256,6 +286,9 @@ def trace_begin(): comm_table(0, "unknown") dso_table(0, 0, "unknown", "unknown", "") symbol_table(0, 0, 0, 0, 0, "unknown") + sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + if perf_db_export_calls: + call_path_table(0, 0, 0, 0) unhandled_count = 0 @@ -270,6 +303,9 @@ def trace_end(): copy_output_file(symbol_file, "symbols") copy_output_file(branch_type_file, "branch_types") copy_output_file(sample_file, "samples") + if perf_db_export_calls: + copy_output_file(call_path_file, "call_paths") + copy_output_file(call_file, "calls") print datetime.datetime.today(), "Removing intermediate files..." remove_output_file(evsel_file) @@ -281,6 +317,9 @@ def trace_end(): remove_output_file(symbol_file) remove_output_file(branch_type_file) remove_output_file(sample_file) + if perf_db_export_calls: + remove_output_file(call_path_file) + remove_output_file(call_file) os.rmdir(output_dir_name) print datetime.datetime.today(), "Adding primary keys" do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') @@ -292,6 +331,9 @@ def trace_end(): do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)') + if perf_db_export_calls: + do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)') + do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') print datetime.datetime.today(), "Adding foreign keys" do_query(query, 'ALTER TABLE threads ' @@ -313,6 +355,18 @@ def trace_end(): 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),' 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),' 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)') + if perf_db_export_calls: + do_query(query, 'ALTER TABLE call_paths ' + 'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),' + 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)') + do_query(query, 'ALTER TABLE calls ' + 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),' + 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),' + 'ADD CONSTRAINT call_pathfk FOREIGN KEY (call_path_id) REFERENCES call_paths (id),' + 'ADD CONSTRAINT callfk FOREIGN KEY (call_id) REFERENCES samples (id),' + 'ADD CONSTRAINT returnfk FOREIGN KEY (return_id) REFERENCES samples (id),' + 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') + do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') if (unhandled_count): print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" @@ -378,3 +432,13 @@ def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, sy else: value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx) sample_file.write(value) + +def call_path_table(cp_id, parent_id, symbol_id, ip, *x): + fmt = "!hiqiqiqiq" + value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip) + call_path_file.write(value) + +def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x): + fmt = "!hiqiqiqiqiqiqiqiqiqiqii" + value = struct.pack(fmt, 11, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags) + call_file.write(value) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index f3ca7798b3d0..cb1d9602f418 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -37,6 +37,7 @@ #include "../comm.h" #include "../machine.h" #include "../db-export.h" +#include "../thread-stack.h" #include "../trace-event.h" #include "../machine.h" @@ -68,6 +69,8 @@ struct tables { PyObject *symbol_handler; PyObject *branch_type_handler; PyObject *sample_handler; + PyObject *call_path_handler; + PyObject *call_return_handler; bool db_export_mode; }; @@ -720,6 +723,64 @@ static int python_export_sample(struct db_export *dbe, return 0; } +static int python_export_call_path(struct db_export *dbe, struct call_path *cp) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + PyObject *t; + u64 parent_db_id, sym_db_id; + + parent_db_id = cp->parent ? cp->parent->db_id : 0; + sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0; + + t = tuple_new(4); + + tuple_set_u64(t, 0, cp->db_id); + tuple_set_u64(t, 1, parent_db_id); + tuple_set_u64(t, 2, sym_db_id); + tuple_set_u64(t, 3, cp->ip); + + call_object(tables->call_path_handler, t, "call_path_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_export_call_return(struct db_export *dbe, + struct call_return *cr) +{ + struct tables *tables = container_of(dbe, struct tables, dbe); + u64 comm_db_id = cr->comm ? cr->comm->db_id : 0; + PyObject *t; + + t = tuple_new(11); + + tuple_set_u64(t, 0, cr->db_id); + tuple_set_u64(t, 1, cr->thread->db_id); + tuple_set_u64(t, 2, comm_db_id); + tuple_set_u64(t, 3, cr->cp->db_id); + tuple_set_u64(t, 4, cr->call_time); + tuple_set_u64(t, 5, cr->return_time); + tuple_set_u64(t, 6, cr->branch_count); + tuple_set_u64(t, 7, cr->call_ref); + tuple_set_u64(t, 8, cr->return_ref); + tuple_set_u64(t, 9, cr->cp->parent->db_id); + tuple_set_s32(t, 10, cr->flags); + + call_object(tables->call_return_handler, t, "call_return_table"); + + Py_DECREF(t); + + return 0; +} + +static int python_process_call_return(struct call_return *cr, void *data) +{ + struct db_export *dbe = data; + + return db_export__call_return(dbe, cr); +} + static void python_process_general_event(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -852,7 +913,9 @@ error: static void set_table_handlers(struct tables *tables) { const char *perf_db_export_mode = "perf_db_export_mode"; - PyObject *db_export_mode; + const char *perf_db_export_calls = "perf_db_export_calls"; + PyObject *db_export_mode, *db_export_calls; + bool export_calls = false; int ret; memset(tables, 0, sizeof(struct tables)); @@ -869,6 +932,23 @@ static void set_table_handlers(struct tables *tables) if (!ret) return; + tables->dbe.crp = NULL; + db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls); + if (db_export_calls) { + ret = PyObject_IsTrue(db_export_calls); + if (ret == -1) + handler_call_die(perf_db_export_calls); + export_calls = !!ret; + } + + if (export_calls) { + tables->dbe.crp = + call_return_processor__new(python_process_call_return, + &tables->dbe); + if (!tables->dbe.crp) + Py_FatalError("failed to create calls processor"); + } + tables->db_export_mode = true; /* * Reserve per symbol space for symbol->db_id via symbol__priv() @@ -884,6 +964,8 @@ static void set_table_handlers(struct tables *tables) SET_TABLE_HANDLER(symbol); SET_TABLE_HANDLER(branch_type); SET_TABLE_HANDLER(sample); + SET_TABLE_HANDLER(call_path); + SET_TABLE_HANDLER(call_return); } /* -- cgit v1.2.3 From 758008b262f70be41104e4e33ba99181ac03775d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Oct 2014 16:09:48 +0200 Subject: perf tools: Defer export of comms that were not 'set' Tracing for a workload begins before the comm event is seen, which results in the initial comm having a string of the form ":" (e.g. ":12345"). In order to export the correct string, defer the export until the new script 'flush' callback. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1414678188-14946-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/db-export.c | 62 +++++++++++++++++++++- tools/perf/util/db-export.h | 3 ++ .../util/scripting-engines/trace-event-python.c | 4 +- 3 files changed, 67 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 017ecbb0ec05..c81dae399763 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -21,17 +21,74 @@ #include "comm.h" #include "symbol.h" #include "event.h" +#include "util.h" #include "thread-stack.h" #include "db-export.h" +struct deferred_export { + struct list_head node; + struct comm *comm; +}; + +static int db_export__deferred(struct db_export *dbe) +{ + struct deferred_export *de; + int err; + + while (!list_empty(&dbe->deferred)) { + de = list_entry(dbe->deferred.next, struct deferred_export, + node); + err = dbe->export_comm(dbe, de->comm); + list_del(&de->node); + free(de); + if (err) + return err; + } + + return 0; +} + +static void db_export__free_deferred(struct db_export *dbe) +{ + struct deferred_export *de; + + while (!list_empty(&dbe->deferred)) { + de = list_entry(dbe->deferred.next, struct deferred_export, + node); + list_del(&de->node); + free(de); + } +} + +static int db_export__defer_comm(struct db_export *dbe, struct comm *comm) +{ + struct deferred_export *de; + + de = zalloc(sizeof(struct deferred_export)); + if (!de) + return -ENOMEM; + + de->comm = comm; + list_add_tail(&de->node, &dbe->deferred); + + return 0; +} + int db_export__init(struct db_export *dbe) { memset(dbe, 0, sizeof(struct db_export)); + INIT_LIST_HEAD(&dbe->deferred); return 0; } +int db_export__flush(struct db_export *dbe) +{ + return db_export__deferred(dbe); +} + void db_export__exit(struct db_export *dbe) { + db_export__free_deferred(dbe); call_return_processor__free(dbe->crp); dbe->crp = NULL; } @@ -115,7 +172,10 @@ int db_export__comm(struct db_export *dbe, struct comm *comm, comm->db_id = ++dbe->comm_last_db_id; if (dbe->export_comm) { - err = dbe->export_comm(dbe, comm); + if (main_thread->comm_set) + err = dbe->export_comm(dbe, comm); + else + err = db_export__defer_comm(dbe, comm); if (err) return err; } diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index dd5ac2ae97d4..adbd22d66798 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -17,6 +17,7 @@ #define __PERF_DB_EXPORT_H #include +#include struct perf_evsel; struct machine; @@ -74,9 +75,11 @@ struct db_export { u64 sample_last_db_id; u64 call_path_last_db_id; u64 call_return_last_db_id; + struct list_head deferred; }; int db_export__init(struct db_export *dbe); +int db_export__flush(struct db_export *dbe); void db_export__exit(struct db_export *dbe); int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel); int db_export__machine(struct db_export *dbe, struct machine *machine); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index cb1d9602f418..118bc62850a8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1030,7 +1030,9 @@ error: static int python_flush_script(void) { - return 0; + struct tables *tables = &tables_global; + + return db_export__flush(&tables->dbe); } /* -- cgit v1.2.3 From c00c48fc6e6ef63d83a7417923a06b08089bb34b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Nov 2014 10:14:27 +0900 Subject: perf symbols: Preparation for compressed kernel module support This patch adds basic support to handle compressed kernel module as some distro (such as Archlinux) carries on it now. The actual work using compression library will be added later. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415063674-17206-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/dso.h | 7 +++++ tools/perf/util/machine.c | 19 ++++++++++- tools/perf/util/symbol-elf.c | 35 ++++++++++++++++++++- tools/perf/util/symbol.c | 8 ++++- 5 files changed, 141 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 0247acfdfaca..36a607cf8f50 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -21,8 +21,10 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b', [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd', [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', + [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP] = 'm', [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M', [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', }; @@ -112,11 +114,13 @@ int dso__read_binary_type_filename(const struct dso *dso, break; case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: path__join3(filename, size, symbol_conf.symfs, root_dir, dso->long_name); break; case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: __symbol__join_symfs(filename, size, dso->long_name); break; @@ -137,6 +141,77 @@ int dso__read_binary_type_filename(const struct dso *dso, return ret; } +static int decompress_dummy(const char *input __maybe_unused, + int output __maybe_unused) +{ + return -1; +} + +static const struct { + const char *fmt; + int (*decompress)(const char *input, int output); +} compressions[] = { + { "gz", decompress_dummy }, + { NULL, }, +}; + +bool is_supported_compression(const char *ext) +{ + unsigned i; + + for (i = 0; compressions[i].fmt; i++) { + if (!strcmp(ext, compressions[i].fmt)) + return true; + } + return false; +} + +bool is_kmodule_extension(const char *ext) +{ + if (strncmp(ext, "ko", 2)) + return false; + + if (ext[2] == '\0' || (ext[2] == '.' && is_supported_compression(ext+3))) + return true; + + return false; +} + +bool is_kernel_module(const char *pathname, bool *compressed) +{ + const char *ext = strrchr(pathname, '.'); + + if (ext == NULL) + return false; + + if (is_supported_compression(ext + 1)) { + if (compressed) + *compressed = true; + ext -= 3; + } else if (compressed) + *compressed = false; + + return is_kmodule_extension(ext + 1); +} + +bool decompress_to_file(const char *ext, const char *filename, int output_fd) +{ + unsigned i; + + for (i = 0; compressions[i].fmt; i++) { + if (!strcmp(ext, compressions[i].fmt)) + return !compressions[i].decompress(filename, + output_fd); + } + return false; +} + +bool dso__needs_decompress(struct dso *dso) +{ + return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; +} + /* * Global list of open DSOs and the counter. */ diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index a316e4af321f..3782c82c6e44 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -22,7 +22,9 @@ enum dso_binary_type { DSO_BINARY_TYPE__BUILDID_DEBUGINFO, DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, + DSO_BINARY_TYPE__GUEST_KMODULE_COMP, DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, DSO_BINARY_TYPE__KCORE, DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, @@ -185,6 +187,11 @@ int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); +bool is_supported_compression(const char *ext); +bool is_kmodule_extension(const char *ext); +bool is_kernel_module(const char *pathname, bool *compressed); +bool decompress_to_file(const char *ext, const char *filename, int output_fd); +bool dso__needs_decompress(struct dso *dso); /* * The dso__data_* external interface provides following functions: diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 51a630301afa..946c7d62cb6e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -464,6 +464,7 @@ struct map *machine__new_module(struct machine *machine, u64 start, { struct map *map; struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename); + bool compressed; if (dso == NULL) return NULL; @@ -476,6 +477,11 @@ struct map *machine__new_module(struct machine *machine, u64 start, dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; else dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (is_kernel_module(filename, &compressed) && compressed) + dso->symtab_type++; + map_groups__insert(&machine->kmaps, map); return map; } @@ -861,8 +867,14 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, struct map *map; char *long_name; - if (dot == NULL || strcmp(dot, ".ko")) + if (dot == NULL) continue; + + /* On some system, modules are compressed like .ko.gz */ + if (is_supported_compression(dot + 1) && + is_kmodule_extension(dot - 2)) + dot -= 3; + snprintf(dso_name, sizeof(dso_name), "[%.*s]", (int)(dot - dent->d_name), dent->d_name); @@ -1044,6 +1056,11 @@ static int machine__process_kernel_mmap_event(struct machine *machine, dot = strrchr(name, '.'); if (dot == NULL) goto out_problem; + /* On some system, modules are compressed like .ko.gz */ + if (is_supported_compression(dot + 1)) + dot -= 3; + if (!is_kmodule_extension(dot + 1)) + goto out_problem; snprintf(short_module_name, sizeof(short_module_name), "[%.*s]", (int)(dot - name), name); strxfrchar(short_module_name, '-', '_'); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 1e23a5bfb044..efc7eb6b8f0f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -546,6 +546,35 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) return 0; } +static int decompress_kmodule(struct dso *dso, const char *name, + enum dso_binary_type type) +{ + int fd; + const char *ext = strrchr(name, '.'); + char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; + + if ((type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && + type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP) || + type != dso->symtab_type) + return -1; + + if (!ext || !is_supported_compression(ext + 1)) + return -1; + + fd = mkstemp(tmpbuf); + if (fd < 0) + return -1; + + if (!decompress_to_file(ext + 1, name, fd)) { + close(fd); + fd = -1; + } + + unlink(tmpbuf); + + return fd; +} + bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -571,7 +600,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, Elf *elf; int fd; - fd = open(name, O_RDONLY); + if (dso__needs_decompress(dso)) + fd = decompress_kmodule(dso, name, type); + else + fd = open(name, O_RDONLY); + if (fd < 0) return -1; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 078331140d8c..c69915c9d5bc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -51,7 +51,9 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__BUILDID_DEBUGINFO, DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, + DSO_BINARY_TYPE__GUEST_KMODULE_COMP, DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__NOT_FOUND, }; @@ -1300,7 +1302,9 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, return dso->kernel == DSO_TYPE_GUEST_KERNEL; case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: /* * kernel modules know their symtab type - it's set when * creating a module dso in machine__new_module(). @@ -1368,7 +1372,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) return -1; kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; + dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; /* * Iterate over candidate debug images. -- cgit v1.2.3 From e92ce12ed6a46302f64269d2d406cf04525f0a8f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 31 Oct 2014 16:51:38 +0900 Subject: perf tools: Add gzip decompression support for kernel module Now my Archlinux box shows module symbols correctly. Before: $ perf report --stdio Failed to open /tmp/perf-3477.map, continuing without symbols no symbols found in /usr/bin/date, maybe install a debug package? No kallsyms or vmlinux with build-id 7b4ea0a49ae2111925857099aaf05c3246ff33e0 was found [drm] with build id 7b4ea0a49ae2111925857099aaf05c3246ff33e0 not found, continuing without symbols No kallsyms or vmlinux with build-id edd931629094b660ca9dec09a1b635c8d87aa2ee was found [jbd2] with build id edd931629094b660ca9dec09a1b635c8d87aa2ee not found, continuing without symbols No kallsyms or vmlinux with build-id a7b1eada671c34933e5610bb920b2ca4945a82c3 was found [ext4] with build id a7b1eada671c34933e5610bb920b2ca4945a82c3 not found, continuing without symbols No kallsyms or vmlinux with build-id d69511fa3e5840e770336ef45b06c83fef8d74e3 was found [scsi_mod] with build id d69511fa3e5840e770336ef45b06c83fef8d74e3 not found, continuing without symbols No kallsyms or vmlinux with build-id af0430af13461af058770ee9b87afc07922c2e77 was found [libata] with build id af0430af13461af058770ee9b87afc07922c2e77 not found, continuing without symbols No kallsyms or vmlinux with build-id aaeedff8160ce631a5f0333591c6ff291201d29f was found [libahci] with build id aaeedff8160ce631a5f0333591c6ff291201d29f not found, continuing without symbols No kallsyms or vmlinux with build-id c57907712becaf662dc4981824bb372c0441d605 was found [mac80211] with build id c57907712becaf662dc4981824bb372c0441d605 not found, continuing without symbols No kallsyms or vmlinux with build-id e0589077cc0ec8c3e4c40eb9f2d9e69d236bee8f was found [iwldvm] with build id e0589077cc0ec8c3e4c40eb9f2d9e69d236bee8f not found, continuing without symbols No kallsyms or vmlinux with build-id 2d86086bf136bf374a2f029cf85a48194f9b950b was found [cfg80211] with build id 2d86086bf136bf374a2f029cf85a48194f9b950b not found, continuing without symbols No kallsyms or vmlinux with build-id 4493c48599bdb3d91d0f8db5150e0be33fdd9221 was found [iwlwifi] with build id 4493c48599bdb3d91d0f8db5150e0be33fdd9221 not found, continuing without symbols ... # # Overhead Command Shared Object Symbol # ........ ............... ....................... ........................................................ # 0.03% swapper [ext4] [k] 0x000000000000fe2e 0.03% swapper [kernel.kallsyms] [k] account_entity_enqueue 0.03% swapper [ext4] [k] 0x000000000000fc2b 0.03% irq/50-iwlwifi [iwlwifi] [k] 0x000000000000200b 0.03% swapper [kernel.kallsyms] [k] ktime_add_safe 0.03% swapper [kernel.kallsyms] [k] elv_completed_request 0.03% swapper [libata] [k] 0x0000000000003997 0.03% swapper [libahci] [k] 0x0000000000001f25 0.03% swapper [kernel.kallsyms] [k] rb_next 0.03% swapper [kernel.kallsyms] [k] blk_finish_request 0.03% swapper [ext4] [k] 0x0000000000010248 0.00% perf [kernel.kallsyms] [k] native_write_msr_safe After: $ perf report --stdio Failed to open /tmp/perf-3477.map, continuing without symbols no symbols found in /usr/bin/tr, maybe install a debug package? ... # # Overhead Command Shared Object Symbol # ........ ............... ........................... ...................................................... # 0.04% kworker/u16:3 [ext4] [k] ext4_read_block_bitmap 0.03% kworker/u16:0 [mac80211] [k] ieee80211_sta_reset_beacon_monitor 0.02% irq/50-iwlwifi [mac80211] [k] ieee80211_get_bssid 0.02% firefox [e1000e] [k] __ew32_prepare 0.02% swapper [libahci] [k] ahci_handle_port_interrupt 0.02% emacs libglib-2.0.so.0.4000.0 [.] g_mutex_unlock 0.02% swapper [e1000e] [k] e1000_clean_tx_irq 0.02% dwm [kernel.kallsyms] [k] __schedule 0.02% gnome-terminal- [vdso] [.] __vdso_clock_gettime 0.02% swapper [e1000e] [k] e1000_alloc_rx_buffers 0.02% irq/50-iwlwifi [mac80211] [k] ieee80211_rx 0.01% firefox [vdso] [.] __vdso_gettimeofday 0.01% irq/50-iwlwifi [iwlwifi] [k] iwl_pcie_rxq_restock.part.13 Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Paul Mackerras Cc: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Stephane Eranian Link: http://lkml.kernel.org/r/87h9yexshi.fsf@sejong.aot.lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 7 +++ tools/perf/config/Makefile | 15 +++++- tools/perf/config/feature-checks/Makefile | 8 ++- tools/perf/config/feature-checks/test-all.c | 5 ++ tools/perf/config/feature-checks/test-zlib.c | 9 ++++ tools/perf/util/dso.c | 12 ++--- tools/perf/util/util.h | 5 ++ tools/perf/util/zlib.c | 78 ++++++++++++++++++++++++++++ 8 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 tools/perf/config/feature-checks/test-zlib.c create mode 100644 tools/perf/util/zlib.c (limited to 'tools/perf/util') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0ebcc4ad0244..aecf61dcd754 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -66,6 +66,9 @@ include config/utilities.mak # # Define NO_PERF_READ_VDSOX32 if you do not want to build perf-read-vdsox32 # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode +# +# Define NO_ZLIB if you do not want to support compressed kernel modules + ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -584,6 +587,10 @@ ifndef NO_LIBNUMA BUILTIN_OBJS += $(OUTPUT)bench/numa.o endif +ifndef NO_ZLIB + LIB_OBJS += $(OUTPUT)util/zlib.o +endif + ifdef ASCIIDOC8 export ASCIIDOC8 endif diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 71264e41fa85..79f906c7124e 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -200,7 +200,8 @@ CORE_FEATURE_TESTS = \ libunwind \ stackprotector-all \ timerfd \ - libdw-dwarf-unwind + libdw-dwarf-unwind \ + zlib LIB_FEATURE_TESTS = \ dwarf \ @@ -214,7 +215,8 @@ LIB_FEATURE_TESTS = \ libpython \ libslang \ libunwind \ - libdw-dwarf-unwind + libdw-dwarf-unwind \ + zlib VF_FEATURE_TESTS = \ backtrace \ @@ -604,6 +606,15 @@ ifneq ($(filter -lbfd,$(EXTLIBS)),) CFLAGS += -DHAVE_LIBBFD_SUPPORT endif +ifndef NO_ZLIB + ifeq ($(feature-zlib), 1) + CFLAGS += -DHAVE_ZLIB_SUPPORT + EXTLIBS += -lz + else + NO_ZLIB := 1 + endif +endif + ifndef NO_BACKTRACE ifeq ($(feature-backtrace), 1) CFLAGS += -DHAVE_BACKTRACE_SUPPORT diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 7c68ec74a808..53f19b5dbc37 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -29,7 +29,8 @@ FILES= \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ test-compile-32.bin \ - test-compile-x32.bin + test-compile-x32.bin \ + test-zlib.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -41,7 +42,7 @@ BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl + $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz test-hello.bin: $(BUILD) @@ -139,6 +140,9 @@ test-compile-32.bin: test-compile-x32.bin: $(CC) -mx32 -o $(OUTPUT)$@ test-compile.c +test-zlib.bin: + $(BUILD) -lz + -include *.d ############################### diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index a7d022e161c0..652e0098eba6 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -93,6 +93,10 @@ # include "test-sync-compare-and-swap.c" #undef main +#define main main_test_zlib +# include "test-zlib.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -116,6 +120,7 @@ int main(int argc, char *argv[]) main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); main_test_sync_compare_and_swap(argc, argv); + main_test_zlib(); return 0; } diff --git a/tools/perf/config/feature-checks/test-zlib.c b/tools/perf/config/feature-checks/test-zlib.c new file mode 100644 index 000000000000..e111fff6240e --- /dev/null +++ b/tools/perf/config/feature-checks/test-zlib.c @@ -0,0 +1,9 @@ +#include + +int main(void) +{ + z_stream zs; + + inflateInit(&zs); + return 0; +} diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 36a607cf8f50..45be944d450a 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -141,18 +141,14 @@ int dso__read_binary_type_filename(const struct dso *dso, return ret; } -static int decompress_dummy(const char *input __maybe_unused, - int output __maybe_unused) -{ - return -1; -} - static const struct { const char *fmt; int (*decompress)(const char *input, int output); } compressions[] = { - { "gz", decompress_dummy }, - { NULL, }, +#ifdef HAVE_ZLIB_SUPPORT + { "gz", gzip_decompress_to_file }, +#endif + { NULL, NULL }, }; bool is_supported_compression(const char *ext) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 80bfdaa0e2a4..7dc44cfe25b3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -351,4 +351,9 @@ void mem_bswap_32(void *src, int byte_size); const char *get_filename_for_perf_kvm(void); bool find_process(const char *name); + +#ifdef HAVE_ZLIB_SUPPORT +int gzip_decompress_to_file(const char *input, int output_fd); +#endif + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c new file mode 100644 index 000000000000..495a449fc25c --- /dev/null +++ b/tools/perf/util/zlib.c @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +#include "util/util.h" +#include "util/debug.h" + + +#define CHUNK_SIZE 16384 + +int gzip_decompress_to_file(const char *input, int output_fd) +{ + int ret = Z_STREAM_ERROR; + int input_fd; + void *ptr; + int len; + struct stat stbuf; + unsigned char buf[CHUNK_SIZE]; + z_stream zs = { + .zalloc = Z_NULL, + .zfree = Z_NULL, + .opaque = Z_NULL, + .avail_in = 0, + .next_in = Z_NULL, + }; + + input_fd = open(input, O_RDONLY); + if (input_fd < 0) + return -1; + + if (fstat(input_fd, &stbuf) < 0) + goto out_close; + + ptr = mmap(NULL, stbuf.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0); + if (ptr == MAP_FAILED) + goto out_close; + + if (inflateInit2(&zs, 16 + MAX_WBITS) != Z_OK) + goto out_unmap; + + zs.next_in = ptr; + zs.avail_in = stbuf.st_size; + + do { + zs.next_out = buf; + zs.avail_out = CHUNK_SIZE; + + ret = inflate(&zs, Z_NO_FLUSH); + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; + /* fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + goto out; + default: + break; + } + + len = CHUNK_SIZE - zs.avail_out; + if (writen(output_fd, buf, len) != len) { + ret = Z_DATA_ERROR; + goto out; + } + + } while (ret != Z_STREAM_END); + +out: + inflateEnd(&zs); +out_unmap: + munmap(ptr, stbuf.st_size); +out_close: + close(input_fd); + + return ret == Z_STREAM_END ? 0 : -1; +} -- cgit v1.2.3 From 714c9c4a98f722115e10d021ea80600f4427b71e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Nov 2014 10:14:29 +0900 Subject: perf build-id: Rename dsos__write_buildid_table() The dsos__write_buildid_table() is not use struct dso and it mostly uses perf_session struct. So rename it to perf_session__write_buildid_ table() so that it corresponds to other related functions such as perf_session__read_build_ids() and perf_session__cache_build_ids(). Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415063674-17206-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0ecf4a304cbc..be8d02eb97e9 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -297,10 +297,8 @@ static int machine__write_buildid_table(struct machine *machine, int fd) return err; } -static int dsos__write_buildid_table(struct perf_header *header, int fd) +static int perf_session__write_buildid_table(struct perf_session *session, int fd) { - struct perf_session *session = container_of(header, - struct perf_session, header); struct rb_node *nd; int err = machine__write_buildid_table(&session->machines.host, fd); @@ -523,7 +521,7 @@ static int write_build_id(int fd, struct perf_header *h, if (!perf_session__read_build_ids(session, true)) return -1; - err = dsos__write_buildid_table(h, fd); + err = perf_session__write_buildid_table(session, fd); if (err < 0) { pr_debug("failed to write buildid table\n"); return err; -- cgit v1.2.3 From e195fac8077f034b0160bf420bdf450ae476701d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Nov 2014 10:14:30 +0900 Subject: perf build-id: Move build-id related functions to util/build-id.c It'd be better managing those functions in a separate place as util/header.c file is already big. It now exports following 3 functions to others: bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); Signed-off-by: Namhyung Kim Acked-by: Adrian Hunter Link: http://lkml.kernel.org/r/545733E7.6010105@intel.com Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415063674-17206-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 334 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/build-id.h | 11 ++ tools/perf/util/header.c | 337 +-------------------------------------------- tools/perf/util/header.h | 8 +- 4 files changed, 349 insertions(+), 341 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 2e7c68e39330..dd2a3e52ada1 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -15,6 +15,8 @@ #include "debug.h" #include "session.h" #include "tool.h" +#include "header.h" +#include "vdso.h" int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, union perf_event *event, @@ -105,3 +107,335 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) build_id_hex, build_id_hex + 2); return bf; } + +#define dsos__for_each_with_build_id(pos, head) \ + list_for_each_entry(pos, head, node) \ + if (!pos->has_build_id) \ + continue; \ + else + +static int write_buildid(const char *name, size_t name_len, u8 *build_id, + pid_t pid, u16 misc, int fd) +{ + int err; + struct build_id_event b; + size_t len; + + len = name_len + 1; + len = PERF_ALIGN(len, NAME_ALIGN); + + memset(&b, 0, sizeof(b)); + memcpy(&b.build_id, build_id, BUILD_ID_SIZE); + b.pid = pid; + b.header.misc = misc; + b.header.size = sizeof(b) + len; + + err = writen(fd, &b, sizeof(b)); + if (err < 0) + return err; + + return write_padded(fd, name, name_len + 1, len); +} + +static int __dsos__write_buildid_table(struct list_head *head, + struct machine *machine, + pid_t pid, u16 misc, int fd) +{ + char nm[PATH_MAX]; + struct dso *pos; + + dsos__for_each_with_build_id(pos, head) { + int err; + const char *name; + size_t name_len; + + if (!pos->hit) + continue; + + if (dso__is_vdso(pos)) { + name = pos->short_name; + name_len = pos->short_name_len + 1; + } else if (dso__is_kcore(pos)) { + machine__mmap_name(machine, nm, sizeof(nm)); + name = nm; + name_len = strlen(nm) + 1; + } else { + name = pos->long_name; + name_len = pos->long_name_len + 1; + } + + err = write_buildid(name, name_len, pos->build_id, + pid, misc, fd); + if (err) + return err; + } + + return 0; +} + +static int machine__write_buildid_table(struct machine *machine, int fd) +{ + int err; + u16 kmisc = PERF_RECORD_MISC_KERNEL, + umisc = PERF_RECORD_MISC_USER; + + if (!machine__is_host(machine)) { + kmisc = PERF_RECORD_MISC_GUEST_KERNEL; + umisc = PERF_RECORD_MISC_GUEST_USER; + } + + err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine, + machine->pid, kmisc, fd); + if (err == 0) + err = __dsos__write_buildid_table(&machine->user_dsos.head, + machine, machine->pid, umisc, + fd); + return err; +} + +int perf_session__write_buildid_table(struct perf_session *session, int fd) +{ + struct rb_node *nd; + int err = machine__write_buildid_table(&session->machines.host, fd); + + if (err) + return err; + + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + err = machine__write_buildid_table(pos, fd); + if (err) + break; + } + return err; +} + +static int __dsos__hit_all(struct list_head *head) +{ + struct dso *pos; + + list_for_each_entry(pos, head, node) + pos->hit = true; + + return 0; +} + +static int machine__hit_all_dsos(struct machine *machine) +{ + int err; + + err = __dsos__hit_all(&machine->kernel_dsos.head); + if (err) + return err; + + return __dsos__hit_all(&machine->user_dsos.head); +} + +int dsos__hit_all(struct perf_session *session) +{ + struct rb_node *nd; + int err; + + err = machine__hit_all_dsos(&session->machines.host); + if (err) + return err; + + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + + err = machine__hit_all_dsos(pos); + if (err) + return err; + } + + return 0; +} + +int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, + const char *name, bool is_kallsyms, bool is_vdso) +{ + const size_t size = PATH_MAX; + char *realname, *filename = zalloc(size), + *linkname = zalloc(size), *targetname; + int len, err = -1; + bool slash = is_kallsyms || is_vdso; + + if (is_kallsyms) { + if (symbol_conf.kptr_restrict) { + pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); + err = 0; + goto out_free; + } + realname = (char *) name; + } else + realname = realpath(name, NULL); + + if (realname == NULL || filename == NULL || linkname == NULL) + goto out_free; + + len = scnprintf(filename, size, "%s%s%s", + debugdir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname); + if (mkdir_p(filename, 0755)) + goto out_free; + + snprintf(filename + len, size - len, "/%s", sbuild_id); + + if (access(filename, F_OK)) { + if (is_kallsyms) { + if (copyfile("/proc/kallsyms", filename)) + goto out_free; + } else if (link(realname, filename) && copyfile(name, filename)) + goto out_free; + } + + len = scnprintf(linkname, size, "%s/.build-id/%.2s", + debugdir, sbuild_id); + + if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) + goto out_free; + + snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); + targetname = filename + strlen(debugdir) - 5; + memcpy(targetname, "../..", 5); + + if (symlink(targetname, linkname) == 0) + err = 0; +out_free: + if (!is_kallsyms) + free(realname); + free(filename); + free(linkname); + return err; +} + +static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, + const char *name, const char *debugdir, + bool is_kallsyms, bool is_vdso) +{ + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(build_id, build_id_size, sbuild_id); + + return build_id_cache__add_s(sbuild_id, debugdir, name, + is_kallsyms, is_vdso); +} + +int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +{ + const size_t size = PATH_MAX; + char *filename = zalloc(size), + *linkname = zalloc(size); + int err = -1; + + if (filename == NULL || linkname == NULL) + goto out_free; + + snprintf(linkname, size, "%s/.build-id/%.2s/%s", + debugdir, sbuild_id, sbuild_id + 2); + + if (access(linkname, F_OK)) + goto out_free; + + if (readlink(linkname, filename, size - 1) < 0) + goto out_free; + + if (unlink(linkname)) + goto out_free; + + /* + * Since the link is relative, we must make it absolute: + */ + snprintf(linkname, size, "%s/.build-id/%.2s/%s", + debugdir, sbuild_id, filename); + + if (unlink(linkname)) + goto out_free; + + err = 0; +out_free: + free(filename); + free(linkname); + return err; +} + +static int dso__cache_build_id(struct dso *dso, struct machine *machine, + const char *debugdir) +{ + bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; + bool is_vdso = dso__is_vdso(dso); + const char *name = dso->long_name; + char nm[PATH_MAX]; + + if (dso__is_kcore(dso)) { + is_kallsyms = true; + machine__mmap_name(machine, nm, sizeof(nm)); + name = nm; + } + return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, + debugdir, is_kallsyms, is_vdso); +} + +static int __dsos__cache_build_ids(struct list_head *head, + struct machine *machine, const char *debugdir) +{ + struct dso *pos; + int err = 0; + + dsos__for_each_with_build_id(pos, head) + if (dso__cache_build_id(pos, machine, debugdir)) + err = -1; + + return err; +} + +static int machine__cache_build_ids(struct machine *machine, const char *debugdir) +{ + int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, + debugdir); + ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, + debugdir); + return ret; +} + +int perf_session__cache_build_ids(struct perf_session *session) +{ + struct rb_node *nd; + int ret; + char debugdir[PATH_MAX]; + + snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); + + if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) + return -1; + + ret = machine__cache_build_ids(&session->machines.host, debugdir); + + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + ret |= machine__cache_build_ids(pos, debugdir); + } + return ret ? -1 : 0; +} + +static bool machine__read_build_ids(struct machine *machine, bool with_hits) +{ + bool ret; + + ret = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits); + ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits); + return ret; +} + +bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) +{ + struct rb_node *nd; + bool ret = machine__read_build_ids(&session->machines.host, with_hits); + + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + ret |= machine__read_build_ids(pos, with_hits); + } + + return ret; +} diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index ae392561470b..666a3bd4f64e 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -15,4 +15,15 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine); + +int dsos__hit_all(struct perf_session *session); + +bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); +int perf_session__write_buildid_table(struct perf_session *session, int fd); +int perf_session__cache_build_ids(struct perf_session *session); + +int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, + const char *name, bool is_kallsyms, bool is_vdso); +int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); + #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index be8d02eb97e9..3e2c156d9c64 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -79,10 +79,7 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } -#define NAME_ALIGN 64 - -static int write_padded(int fd, const void *bf, size_t count, - size_t count_aligned) +int write_padded(int fd, const void *bf, size_t count, size_t count_aligned) { static const char zero_buf[NAME_ALIGN]; int err = do_write(fd, bf, count); @@ -171,338 +168,6 @@ perf_header__set_cmdline(int argc, const char **argv) return 0; } -#define dsos__for_each_with_build_id(pos, head) \ - list_for_each_entry(pos, head, node) \ - if (!pos->has_build_id) \ - continue; \ - else - -static int write_buildid(const char *name, size_t name_len, u8 *build_id, - pid_t pid, u16 misc, int fd) -{ - int err; - struct build_id_event b; - size_t len; - - len = name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); - - memset(&b, 0, sizeof(b)); - memcpy(&b.build_id, build_id, BUILD_ID_SIZE); - b.pid = pid; - b.header.misc = misc; - b.header.size = sizeof(b) + len; - - err = do_write(fd, &b, sizeof(b)); - if (err < 0) - return err; - - return write_padded(fd, name, name_len + 1, len); -} - -static int __dsos__hit_all(struct list_head *head) -{ - struct dso *pos; - - list_for_each_entry(pos, head, node) - pos->hit = true; - - return 0; -} - -static int machine__hit_all_dsos(struct machine *machine) -{ - int err; - - err = __dsos__hit_all(&machine->kernel_dsos.head); - if (err) - return err; - - return __dsos__hit_all(&machine->user_dsos.head); -} - -int dsos__hit_all(struct perf_session *session) -{ - struct rb_node *nd; - int err; - - err = machine__hit_all_dsos(&session->machines.host); - if (err) - return err; - - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - - err = machine__hit_all_dsos(pos); - if (err) - return err; - } - - return 0; -} - -static int __dsos__write_buildid_table(struct list_head *head, - struct machine *machine, - pid_t pid, u16 misc, int fd) -{ - char nm[PATH_MAX]; - struct dso *pos; - - dsos__for_each_with_build_id(pos, head) { - int err; - const char *name; - size_t name_len; - - if (!pos->hit) - continue; - - if (dso__is_vdso(pos)) { - name = pos->short_name; - name_len = pos->short_name_len + 1; - } else if (dso__is_kcore(pos)) { - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; - name_len = strlen(nm) + 1; - } else { - name = pos->long_name; - name_len = pos->long_name_len + 1; - } - - err = write_buildid(name, name_len, pos->build_id, - pid, misc, fd); - if (err) - return err; - } - - return 0; -} - -static int machine__write_buildid_table(struct machine *machine, int fd) -{ - int err; - u16 kmisc = PERF_RECORD_MISC_KERNEL, - umisc = PERF_RECORD_MISC_USER; - - if (!machine__is_host(machine)) { - kmisc = PERF_RECORD_MISC_GUEST_KERNEL; - umisc = PERF_RECORD_MISC_GUEST_USER; - } - - err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine, - machine->pid, kmisc, fd); - if (err == 0) - err = __dsos__write_buildid_table(&machine->user_dsos.head, - machine, machine->pid, umisc, - fd); - return err; -} - -static int perf_session__write_buildid_table(struct perf_session *session, int fd) -{ - struct rb_node *nd; - int err = machine__write_buildid_table(&session->machines.host, fd); - - if (err) - return err; - - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - err = machine__write_buildid_table(pos, fd); - if (err) - break; - } - return err; -} - -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso) -{ - const size_t size = PATH_MAX; - char *realname, *filename = zalloc(size), - *linkname = zalloc(size), *targetname; - int len, err = -1; - bool slash = is_kallsyms || is_vdso; - - if (is_kallsyms) { - if (symbol_conf.kptr_restrict) { - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - err = 0; - goto out_free; - } - realname = (char *) name; - } else - realname = realpath(name, NULL); - - if (realname == NULL || filename == NULL || linkname == NULL) - goto out_free; - - len = scnprintf(filename, size, "%s%s%s", - debugdir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname); - if (mkdir_p(filename, 0755)) - goto out_free; - - snprintf(filename + len, size - len, "/%s", sbuild_id); - - if (access(filename, F_OK)) { - if (is_kallsyms) { - if (copyfile("/proc/kallsyms", filename)) - goto out_free; - } else if (link(realname, filename) && copyfile(name, filename)) - goto out_free; - } - - len = scnprintf(linkname, size, "%s/.build-id/%.2s", - debugdir, sbuild_id); - - if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) - goto out_free; - - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); - targetname = filename + strlen(debugdir) - 5; - memcpy(targetname, "../..", 5); - - if (symlink(targetname, linkname) == 0) - err = 0; -out_free: - if (!is_kallsyms) - free(realname); - free(filename); - free(linkname); - return err; -} - -static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, const char *debugdir, - bool is_kallsyms, bool is_vdso) -{ - char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - - build_id__sprintf(build_id, build_id_size, sbuild_id); - - return build_id_cache__add_s(sbuild_id, debugdir, name, - is_kallsyms, is_vdso); -} - -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) -{ - const size_t size = PATH_MAX; - char *filename = zalloc(size), - *linkname = zalloc(size); - int err = -1; - - if (filename == NULL || linkname == NULL) - goto out_free; - - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, sbuild_id + 2); - - if (access(linkname, F_OK)) - goto out_free; - - if (readlink(linkname, filename, size - 1) < 0) - goto out_free; - - if (unlink(linkname)) - goto out_free; - - /* - * Since the link is relative, we must make it absolute: - */ - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, filename); - - if (unlink(linkname)) - goto out_free; - - err = 0; -out_free: - free(filename); - free(linkname); - return err; -} - -static int dso__cache_build_id(struct dso *dso, struct machine *machine, - const char *debugdir) -{ - bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; - bool is_vdso = dso__is_vdso(dso); - const char *name = dso->long_name; - char nm[PATH_MAX]; - - if (dso__is_kcore(dso)) { - is_kallsyms = true; - machine__mmap_name(machine, nm, sizeof(nm)); - name = nm; - } - return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - debugdir, is_kallsyms, is_vdso); -} - -static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine, const char *debugdir) -{ - struct dso *pos; - int err = 0; - - dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine, debugdir)) - err = -1; - - return err; -} - -static int machine__cache_build_ids(struct machine *machine, const char *debugdir) -{ - int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, - debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, - debugdir); - return ret; -} - -static int perf_session__cache_build_ids(struct perf_session *session) -{ - struct rb_node *nd; - int ret; - char debugdir[PATH_MAX]; - - snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); - - if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) - return -1; - - ret = machine__cache_build_ids(&session->machines.host, debugdir); - - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, debugdir); - } - return ret ? -1 : 0; -} - -static bool machine__read_build_ids(struct machine *machine, bool with_hits) -{ - bool ret; - - ret = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits); - ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits); - return ret; -} - -static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) -{ - struct rb_node *nd; - bool ret = machine__read_build_ids(&session->machines.host, with_hits); - - for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__read_build_ids(pos, with_hits); - } - - return ret; -} - static int write_tracing_data(int fd, struct perf_header *h __maybe_unused, struct perf_evlist *evlist) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 8f5cbaea64a5..3bb90ac172a1 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -122,10 +122,6 @@ int perf_header__process_sections(struct perf_header *header, int fd, int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso); -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); - int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); @@ -151,7 +147,9 @@ int perf_event__process_build_id(struct perf_tool *tool, struct perf_session *session); bool is_perf_magic(u64 magic); -int dsos__hit_all(struct perf_session *session); +#define NAME_ALIGN 64 + +int write_padded(int fd, const void *bf, size_t count, size_t count_aligned); /* * arch specific callback -- cgit v1.2.3 From 00dc865798a31d3d5300dd5d70166a4a85f76a20 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Nov 2014 10:14:32 +0900 Subject: perf record: Do not save pathname in ./debug/.build-id directory for vmlinux When perf record finishes a session, it pre-processes samples in order to write build-id info from DSOs that had samples. During this process it'll call map__load() for the kernel map, and it ends up calling dso__load_vmlinux_path() which replaces dso->long_name. But this function checks kernel's build-id before searching vmlinux path so it'll end up with a cryptic name, the pathname for the entry in the ~/.debug cache, which can be confusing to users. This patch adds a flag to skip the build-id check during record, so that it'll have the original vmlinux path for the kernel dso->long_name, not the entry in the ~/.debug cache. Before: # perf record -va sleep 3 mmap size 528384B [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.196 MB perf.data (~8545 samples) ] Looking at the vmlinux_path (7 entries long) Using /home/namhyung/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 for symbols After: # perf record -va sleep 3 mmap size 528384B [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.193 MB perf.data (~8432 samples) ] Looking at the vmlinux_path (7 entries long) Using /lib/modules/3.16.4-1-ARCH/build/vmlinux for symbols Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415063674-17206-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 11 +++++++++++ tools/perf/util/symbol.c | 11 ++++++----- tools/perf/util/symbol.h | 1 + 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5091a27e6d28..582c4da155ea 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -200,6 +200,17 @@ static int process_buildids(struct record *rec) if (size == 0) return 0; + /* + * During this process, it'll load kernel map and replace the + * dso->long_name to a real pathname it found. In this case + * we prefer the vmlinux path like + * /lib/modules/3.16.4/build/vmlinux + * + * rather than build-id path (in debug directory). + * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551 + */ + symbol_conf.ignore_vmlinux_buildid = true; + return __perf_session__process_events(session, start, size - start, size, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c69915c9d5bc..c24c5b83156c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1511,12 +1511,10 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, symbol_filter_t filter) { int i, err = 0; - char *filename; + char *filename = NULL; - pr_debug("Looking at the vmlinux_path (%d entries long)\n", - vmlinux_path__nr_entries + 1); - - filename = dso__build_id_filename(dso, NULL, 0); + if (!symbol_conf.ignore_vmlinux_buildid) + filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { err = dso__load_vmlinux(dso, map, filename, true, filter); if (err > 0) @@ -1524,6 +1522,9 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, free(filename); } + pr_debug("Looking at the vmlinux_path (%d entries long)\n", + vmlinux_path__nr_entries + 1); + for (i = 0; i < vmlinux_path__nr_entries; ++i) { err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter); if (err > 0) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index eb2c19bf8d90..ded3ca7266de 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,6 +105,7 @@ struct symbol_conf { unsigned short nr_events; bool try_vmlinux_path, ignore_vmlinux, + ignore_vmlinux_buildid, show_kernel_path, use_modules, sort_by_name, -- cgit v1.2.3 From b837a8bdc48925e6512412973b845c53cbe2b412 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Nov 2014 10:14:33 +0900 Subject: perf tools: Fix build-id matching on vmlinux There's a problem on finding correct kernel symbols when perf report runs on a different kernel. Although a part of the problem was solved by the prior commit 0a7e6d1b6844 ("perf tools: Check recorded kernel version when finding vmlinux"), there's a remaining problem still. When perf records samples, it synthesizes the kernel map using machine__mmap_name() and ref_reloc_sym like "[kernel.kallsyms]_text". You can easily see it using 'perf report -D' command. After finishing record, it goes through the recorded events to find maps/dsos actually used. And then record build-id info of them. During this process, it needs to load symbols in a dso and it'd call dso__load_vmlinux_path() since the default value of the symbol_conf. try_vmlinux_path is true. However it changes dso->long_name to a real path of the vmlinux file (e.g. /lib/modules/3.16.4/build/vmlinux) if one is running on a custom kernel. It resulted in that perf report reads the build-id of the vmlinux, but cannot use it since it only knows about the [kernel.kallsyms] map. It then falls back to possible vmlinux paths by using the recorded kernel version (in case of a recent version) or a running kernel silently. Even with the recent tools, this still has a possibility of breaking the result. As the build directory is a symbolic link, if one built a new kernel in the same directory with different source/config, the old link to vmlinux will point the new file. So it's absolutely needed to use build-id when finding a kernel image. In this patch, it's now changed to try to search a kernel dso in the existing dso list which was constructed during build-id table parsing so it'll always have a build-id. If not found, search "[kernel.kallsyms]". Before: $ perf report # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ............................... # 72.15% 0.00% swapper [kernel.kallsyms] [k] set_curr_task_rt 72.15% 0.00% swapper [kernel.kallsyms] [k] native_calibrate_tsc 72.15% 0.00% swapper [kernel.kallsyms] [k] tsc_refine_calibration_work 71.87% 71.87% swapper [kernel.kallsyms] [k] module_finalize ... After (for the same perf.data): 72.15% 0.00% swapper vmlinux [k] cpu_startup_entry 72.15% 0.00% swapper vmlinux [k] arch_cpu_idle 72.15% 0.00% swapper vmlinux [k] default_idle 71.87% 71.87% swapper vmlinux [k] native_safe_halt ... Signed-off-by: Namhyung Kim Acked-by: Ingo Molnar Link: http://lkml.kernel.org/r/20140924073356.GB1962@gmail.com Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415063674-17206-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- tools/perf/util/machine.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 3e2c156d9c64..76442caca37e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1269,7 +1269,7 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (filename[0] == '[') + if (!is_kernel_module(filename, NULL)) dso->kernel = dso_type; build_id__sprintf(dso->build_id, sizeof(dso->build_id), diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 946c7d62cb6e..53f90e9c65fe 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1085,8 +1085,20 @@ static int machine__process_kernel_mmap_event(struct machine *machine, * Should be there already, from the build-id table in * the header. */ - struct dso *kernel = __dsos__findnew(&machine->kernel_dsos, - kmmap_prefix); + struct dso *kernel = NULL; + struct dso *dso; + + list_for_each_entry(dso, &machine->kernel_dsos.head, node) { + if (is_kernel_module(dso->long_name, NULL)) + continue; + + kernel = dso; + break; + } + + if (kernel == NULL) + kernel = __dsos__findnew(&machine->kernel_dsos, + kmmap_prefix); if (kernel == NULL) goto out_problem; -- cgit v1.2.3 From 96d78059d6d9da45d77078a219924304860497f2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Nov 2014 10:14:34 +0900 Subject: perf tools: Make vmlinux short name more like kallsyms short name The previous patch changed kernel dso name from '[kernel.kallsyms]' to vmlinux. However it might add confusion to old users accustomed to the old name. So change the short name to '[kernel.vmlinux]' to reduce such confusion. Before: # Overhead Command Shared Object Symbol # ........ .............. ....................... ............................... # 9.83% swapper vmlinux [k] intel_idle 4.10% awk libc-2.20.so [.] __strcmp_sse2 1.86% sed libc-2.20.so [.] __strcmp_sse2 1.78% netctl-auto libc-2.20.so [.] __strcmp_sse2 1.23% netctl-auto libc-2.20.so [.] __mbrtowc 1.21% firefox libxul.so [.] 0x00000000024b62bd 1.20% swapper vmlinux [k] cpuidle_enter_state 1.03% sleep vmlinux [k] copy_user_generic_unrolled After: # Overhead Command Shared Object Symbol # ........ .............. ....................... ............................... # 9.83% swapper [kernel.vmlinux] [k] intel_idle 4.10% awk libc-2.20.so [.] __strcmp_sse2 1.86% sed libc-2.20.so [.] __strcmp_sse2 1.78% netctl-auto libc-2.20.so [.] __strcmp_sse2 1.23% netctl-auto libc-2.20.so [.] __mbrtowc 1.21% firefox libxul.so [.] 0x00000000024b62bd 1.20% swapper [kernel.vmlinux] [k] cpuidle_enter_state 1.03% sleep [kernel.vmlinux] [k] copy_user_generic_unrolled Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415063674-17206-9-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 53f90e9c65fe..52e94902afb1 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1106,6 +1106,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (__machine__create_kernel_maps(machine, kernel) < 0) goto out_problem; + if (strstr(dso->long_name, "vmlinux")) + dso__set_short_name(dso, "[kernel.vmlinux]", false); + machine__set_kernel_mmap_len(machine, event); /* -- cgit v1.2.3 From 416c419cc3799ddf7ea467c9adcb4cd038bd94a4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 26 Oct 2014 23:44:03 +0100 Subject: perf tools: Add test_and_set_bit function Set a bit and return its old value. Stolen from kernel sources, will be used in next patches. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1414363445-22370-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/bitmap.h | 17 +++++++++++++++++ tools/perf/util/include/linux/bitops.h | 2 ++ 2 files changed, 19 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h index 01ffd12dc791..40bd21488032 100644 --- a/tools/perf/util/include/linux/bitmap.h +++ b/tools/perf/util/include/linux/bitmap.h @@ -46,4 +46,21 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + */ +static inline int test_and_set_bit(int nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + + old = *p; + *p = old | mask; + + return (old & mask) != 0; +} + #endif /* _PERF_BITOPS_H */ diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index dadfa7e54287..c3294163de17 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -15,6 +15,8 @@ #define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) #define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ -- cgit v1.2.3 From cdae2d1e936457bf72673cb77e7f5f4b9d4c451e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 26 Oct 2014 23:44:04 +0100 Subject: perf script perl: Removing event cache as it's no longer needed We don't need to maintain cache of 'struct event_format' objects. Currently the 'struct perf_evsel' holds this reference already. Adding events_defined bitmap to keep track of defined events, which is much cheaper than array of pointers. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1414363445-22370-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/util/scripting-engines/trace-event-perl.c | 29 +++++----------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 0a01bac4ce02..22ebc46226e7 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "../util.h" #include @@ -57,7 +58,7 @@ INTERP my_perl; #define FTRACE_MAX_EVENT \ ((1 << (sizeof(unsigned short) * 8)) - 1) -struct event_format *events[FTRACE_MAX_EVENT]; +static DECLARE_BITMAP(events_defined, FTRACE_MAX_EVENT); extern struct scripting_context *scripting_context; @@ -238,35 +239,15 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline struct event_format *find_cache_event(struct perf_evsel *evsel) -{ - static char ev_name[256]; - struct event_format *event; - int type = evsel->attr.config; - - if (events[type]) - return events[type]; - - events[type] = event = evsel->tp_format; - if (!event) - return NULL; - - sprintf(ev_name, "%s::%s", event->system, event->name); - - define_event_symbols(event, ev_name, event->print_fmt.args); - - return event; -} - static void perl_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread) { + struct event_format *event = evsel->tp_format; struct format_field *field; static char handler[256]; unsigned long long val; unsigned long s, ns; - struct event_format *event; int pid; int cpu = sample->cpu; void *data = sample->raw_data; @@ -278,7 +259,6 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (evsel->attr.type != PERF_TYPE_TRACEPOINT) return; - event = find_cache_event(evsel); if (!event) die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); @@ -286,6 +266,9 @@ static void perl_process_tracepoint(struct perf_sample *sample, sprintf(handler, "%s::%s", event->system, event->name); + if (!test_and_set_bit(event->id, events_defined)) + define_event_symbols(event, handler, event->print_fmt.args); + s = nsecs / NSECS_PER_SEC; ns = nsecs - s * NSECS_PER_SEC; -- cgit v1.2.3 From adf5bcf39583c4db1bf30069f8957400e61ccb18 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 26 Oct 2014 23:44:05 +0100 Subject: perf script python: Removing event cache as it's no longer needed We don't need to maintain cache of 'struct event_format' objects. Currently the 'struct perf_evsel' holds this reference already. Adding events_defined bitmap to keep track of defined events, which is much cheaper than array of pointers. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1414363445-22370-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- .../util/scripting-engines/trace-event-python.c | 34 ++++------------------ 1 file changed, 6 insertions(+), 28 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 118bc62850a8..d808a328f4dc 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "../../perf.h" #include "../debug.h" @@ -46,7 +47,7 @@ PyMODINIT_FUNC initperf_trace_context(void); #define FTRACE_MAX_EVENT \ ((1 << (sizeof(unsigned short) * 8)) - 1) -struct event_format *events[FTRACE_MAX_EVENT]; +static DECLARE_BITMAP(events_defined, FTRACE_MAX_EVENT); #define MAX_FIELDS 64 #define N_COMMON_FIELDS 7 @@ -255,31 +256,6 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline struct event_format *find_cache_event(struct perf_evsel *evsel) -{ - static char ev_name[256]; - struct event_format *event; - int type = evsel->attr.config; - - /* - * XXX: Do we really need to cache this since now we have evsel->tp_format - * cached already? Need to re-read this "cache" routine that as well calls - * define_event_symbols() :-\ - */ - if (events[type]) - return events[type]; - - events[type] = event = evsel->tp_format; - if (!event) - return NULL; - - sprintf(ev_name, "%s__%s", event->system, event->name); - - define_event_symbols(event, ev_name, event->print_fmt.args); - - return event; -} - static PyObject *get_field_numeric_entry(struct event_format *event, struct format_field *field, void *data) { @@ -403,12 +379,12 @@ static void python_process_tracepoint(struct perf_sample *sample, struct thread *thread, struct addr_location *al) { + struct event_format *event = evsel->tp_format; PyObject *handler, *context, *t, *obj, *callchain; PyObject *dict = NULL; static char handler_name[256]; struct format_field *field; unsigned long s, ns; - struct event_format *event; unsigned n = 0; int pid; int cpu = sample->cpu; @@ -420,7 +396,6 @@ static void python_process_tracepoint(struct perf_sample *sample, if (!t) Py_FatalError("couldn't create Python tuple"); - event = find_cache_event(evsel); if (!event) die("ug! no event found for type %d", (int)evsel->attr.config); @@ -428,6 +403,9 @@ static void python_process_tracepoint(struct perf_sample *sample, sprintf(handler_name, "%s__%s", event->system, event->name); + if (!test_and_set_bit(event->id, events_defined)) + define_event_symbols(event, handler_name, event->print_fmt.args); + handler = get_handler(handler_name); if (!handler) { dict = PyDict_New(); -- cgit v1.2.3 From daa01794a4a36a1da1b09a529adec0c8c0b94ab2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2014 11:55:38 +0100 Subject: perf evsel: Do not call pevent_free_format when deleting tracepoint The libtraceevent library's main handle 'struct pevent' holds pointers of every event that was added to it via functions: pevent_parse_format pevent_parse_event We can't release struct event_format (call pevent_free_format) separately, because that breaks that pointers array mentioned above and another add_event call could end up with segfault. All added events are released within the handle cleanup in pevent_free. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1415098538-1512-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2f9e68025ede..12b4396c7175 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -853,8 +853,6 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_id(evsel); close_cgroup(evsel->cgrp); zfree(&evsel->group_name); - if (evsel->tp_format) - pevent_free_format(evsel->tp_format); zfree(&evsel->name); perf_evsel__object.fini(evsel); } -- cgit v1.2.3 From 6a21c0b5c2abd2fdfa6fff79f11df3d6082c1873 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 24 Sep 2014 13:48:39 +0200 Subject: perf tools: Add core support for sampling intr machine state regs Add the infrastructure to setup, collect and report the interrupt machine state regs which can be captured by the kernel. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: cebbert.lkml@gmail.com Cc: Adrian Hunter Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Don Zickus Cc: Jean Pihet Cc: Jiri Olsa Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Paul Mackerras Cc: Stephane Eranian Cc: Waiman Long Cc: Wang Nan Link: http://lkml.kernel.org/r/1411559322-16548-4-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- tools/perf/perf.h | 1 + tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/header.c | 1 + tools/perf/util/session.c | 44 +++++++++++++++++++++++++++++++++++++++----- 5 files changed, 87 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 511c2831aa81..1dabb8553499 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,6 +52,7 @@ struct record_opts { bool sample_weight; bool sample_time; bool period; + bool sample_intr_regs; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 7be389735402..09b9e8d3fcf7 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -188,6 +188,7 @@ struct perf_sample { struct ip_callchain *callchain; struct branch_stack *branch_stack; struct regs_dump user_regs; + struct regs_dump intr_regs; struct stack_dump user_stack; struct sample_read read; }; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 12b4396c7175..34344ffa79ca 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -661,6 +661,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (callchain_param.enabled && !evsel->no_aux_samples) perf_evsel__config_callgraph(evsel); + if (opts->sample_intr_regs) { + attr->sample_regs_intr = PERF_REGS_MASK; + perf_evsel__set_sample_bit(evsel, REGS_INTR); + } + if (target__has_cpu(&opts->target)) perf_evsel__set_sample_bit(evsel, CPU); @@ -1037,6 +1042,7 @@ static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) ret += PRINT_ATTR_X64(branch_sample_type); ret += PRINT_ATTR_X64(sample_regs_user); ret += PRINT_ATTR_U32(sample_stack_user); + ret += PRINT_ATTR_X64(sample_regs_intr); ret += fprintf(fp, "%.60s\n", graph_dotted_line); @@ -1536,6 +1542,23 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } + data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE; + if (type & PERF_SAMPLE_REGS_INTR) { + OVERFLOW_CHECK_u64(array); + data->intr_regs.abi = *array; + array++; + + if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 mask = evsel->attr.sample_regs_intr; + + sz = hweight_long(mask) * sizeof(u64); + OVERFLOW_CHECK(array, sz, max_size); + data->intr_regs.mask = mask; + data->intr_regs.regs = (u64 *)array; + array = (void *)array + sz; + } + } + return 0; } @@ -1631,6 +1654,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_TRANSACTION) result += sizeof(u64); + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + result += sizeof(u64); + sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + result += sz; + } else { + result += sizeof(u64); + } + } + return result; } @@ -1809,6 +1842,17 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, array++; } + if (type & PERF_SAMPLE_REGS_INTR) { + if (sample->intr_regs.abi) { + *array++ = sample->intr_regs.abi; + sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); + memcpy(array, sample->intr_regs.regs, sz); + array = (void *)array + sz; + } else { + *array++ = 0; + } + } + return 0; } @@ -1938,7 +1982,7 @@ static int sample_type__fprintf(FILE *fp, bool *first, u64 value) bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 76442caca37e..05fab7a188dc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2143,6 +2143,7 @@ static const int attr_file_abi_sizes[] = { [1] = PERF_ATTR_SIZE_VER1, [2] = PERF_ATTR_SIZE_VER2, [3] = PERF_ATTR_SIZE_VER3, + [4] = PERF_ATTR_SIZE_VER4, 0, }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f4478ce72fdb..6ac62ae6b8fa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -592,15 +592,46 @@ static void regs_dump__printf(u64 mask, u64 *regs) } } +static const char *regs_abi[] = { + [PERF_SAMPLE_REGS_ABI_NONE] = "none", + [PERF_SAMPLE_REGS_ABI_32] = "32-bit", + [PERF_SAMPLE_REGS_ABI_64] = "64-bit", +}; + +static inline const char *regs_dump_abi(struct regs_dump *d) +{ + if (d->abi > PERF_SAMPLE_REGS_ABI_64) + return "unknown"; + + return regs_abi[d->abi]; +} + +static void regs__printf(const char *type, struct regs_dump *regs) +{ + u64 mask = regs->mask; + + printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n", + type, + mask, + regs_dump_abi(regs)); + + regs_dump__printf(mask, regs->regs); +} + static void regs_user__printf(struct perf_sample *sample) { struct regs_dump *user_regs = &sample->user_regs; - if (user_regs->regs) { - u64 mask = user_regs->mask; - printf("... user regs: mask 0x%" PRIx64 "\n", mask); - regs_dump__printf(mask, user_regs->regs); - } + if (user_regs->regs) + regs__printf("user", user_regs); +} + +static void regs_intr__printf(struct perf_sample *sample) +{ + struct regs_dump *intr_regs = &sample->intr_regs; + + if (intr_regs->regs) + regs__printf("intr", intr_regs); } static void stack_user__printf(struct stack_dump *dump) @@ -699,6 +730,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample); + if (sample_type & PERF_SAMPLE_REGS_INTR) + regs_intr__printf(sample); + if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); -- cgit v1.2.3 From 73c5d224b4514575abe0dd89b43adbde937429c3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 7 Nov 2014 22:57:56 +0900 Subject: perf build-id: Move disable_buildid_cache() to util/build-id.c Also move static variable no_buildid_cache and check it in the perf_session_cache_build_ids(). Signed-off-by: Namhyung Kim Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Hemant Kumar Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Oleg Nesterov Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Vasant Hegde Cc: systemtap@sourceware.org Link: http://lkml.kernel.org/r/1415368677-3794-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 11 +++++++++++ tools/perf/util/build-id.h | 1 + tools/perf/util/header.c | 10 +--------- tools/perf/util/util.h | 1 - 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index dd2a3e52ada1..e8d79e5bfaf7 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -18,6 +18,9 @@ #include "header.h" #include "vdso.h" + +static bool no_buildid_cache; + int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -251,6 +254,11 @@ int dsos__hit_all(struct perf_session *session) return 0; } +void disable_buildid_cache(void) +{ + no_buildid_cache = true; +} + int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms, bool is_vdso) { @@ -404,6 +412,9 @@ int perf_session__cache_build_ids(struct perf_session *session) int ret; char debugdir[PATH_MAX]; + if (no_buildid_cache) + return 0; + snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir); if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 666a3bd4f64e..8236319514d5 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -25,5 +25,6 @@ int perf_session__cache_build_ids(struct perf_session *session); int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, const char *name, bool is_kallsyms, bool is_vdso); int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); +void disable_buildid_cache(void); #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 05fab7a188dc..b20e40c74468 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -24,8 +24,6 @@ #include "build-id.h" #include "data.h" -static bool no_buildid_cache = false; - static u32 header_argc; static const char **header_argv; @@ -191,8 +189,7 @@ static int write_build_id(int fd, struct perf_header *h, pr_debug("failed to write buildid table\n"); return err; } - if (!no_buildid_cache) - perf_session__cache_build_ids(session); + perf_session__cache_build_ids(session); return 0; } @@ -2791,8 +2788,3 @@ int perf_event__process_build_id(struct perf_tool *tool __maybe_unused, session); return 0; } - -void disable_buildid_cache(void) -{ - no_buildid_cache = true; -} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7dc44cfe25b3..76d23d83eae5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -154,7 +154,6 @@ extern void set_die_routine(void (*routine)(const char *err, va_list params) NOR extern int prefixcmp(const char *str, const char *prefix); extern void set_buildid_dir(void); -extern void disable_buildid_cache(void); static inline const char *skip_prefix(const char *str, const char *prefix) { -- cgit v1.2.3 From 162bcc17bb876772793ca070ebd6488cfdae09bf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Nov 2014 11:25:28 -0300 Subject: perf symbols: Fallback to kallsyms when using the minimal 'ELF' loader The minimal ELF loader should not return 1 when it manages to read the vmlinux build-id, it should instead return 0, meaning that it hasn't loaded any symbols, since it doesn't parses ELF at all. That way, the main symbol.c routines will understand that it is necessary to continue looking for a file with symbols, and when no libelf is linked, that means it will eventually try kallsyms. Reported-by: Peter Zijlstra Tested-by: Peter Zijlstra Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20141111130326.GT18464@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-minimal.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c9541fea9514..fa585c63f56a 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -341,7 +341,6 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { dso__set_build_id(dso, build_id); - return 1; } return 0; } -- cgit v1.2.3 From ee205503f2333d639550eaed37abb455733510a3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Nov 2014 12:04:54 +0200 Subject: perf tools: Fix annotation with kcore Patch "perf tools: Fix build-id matching on vmlinux" breaks annotation with kcore. The problem is that symbol__annotate() first gets the filename based on the build-id which was previously not set. This patch provides a quick fix, however there should probably be only one way to determine the filename. e.g. symbol__annotate() should use the same way as dso__data_fd(). Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415700294-30816-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7dabde14ea54..873c8778db20 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -915,6 +915,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) return -ENOMEM; } goto fallback; + } else if (dso__is_kcore(dso)) { + goto fallback; } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || strstr(command, "[kernel.kallsyms]") || access(symfs_filename, R_OK)) { -- cgit v1.2.3 From 37592b8afb7151994e760d1727c264329d9c13c8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:19 -0800 Subject: perf callchain: Factor out adding new call chain entries Move the code to resolve and add a new callchain entry into a new add_callchain_ip function. This will be used in the next patches to add LBRs too. No change in behavior. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 51 +++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 19 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 52e94902afb1..84390eecab06 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1381,6 +1381,34 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, return mi; } +static int add_callchain_ip(struct thread *thread, + struct symbol **parent, + struct addr_location *root_al, + int cpumode, + u64 ip) +{ + struct addr_location al; + + al.filtered = 0; + al.sym = NULL; + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + ip, &al); + if (al.sym != NULL) { + if (sort__has_parent && !*parent && + symbol__match_regex(al.sym, &parent_regex)) + *parent = al.sym; + else if (have_ignore_callees && root_al && + symbol__match_regex(al.sym, &ignore_callees_regex)) { + /* Treat this symbol as the root, + forgetting its callees. */ + *root_al = al; + callchain_cursor_reset(&callchain_cursor); + } + } + + return callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); +} + struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al) { @@ -1427,7 +1455,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < chain_nr; i++) { u64 ip; - struct addr_location al; if (callchain_param.order == ORDER_CALLEE) j = i; @@ -1464,24 +1491,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, continue; } - al.filtered = 0; - thread__find_addr_location(thread, cpumode, - MAP__FUNCTION, ip, &al); - if (al.sym != NULL) { - if (sort__has_parent && !*parent && - symbol__match_regex(al.sym, &parent_regex)) - *parent = al.sym; - else if (have_ignore_callees && root_al && - symbol__match_regex(al.sym, &ignore_callees_regex)) { - /* Treat this symbol as the root, - forgetting its callees. */ - *root_al = al; - callchain_cursor_reset(&callchain_cursor); - } - } - - err = callchain_cursor_append(&callchain_cursor, - ip, al.map, al.sym); + err = add_callchain_ip(thread, parent, root_al, + cpumode, ip); + if (err == -EINVAL) + break; if (err) return err; } -- cgit v1.2.3 From 5550171b2a9f8df26ff483051d060db06376b26d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:21 -0800 Subject: perf callchain: Use al.addr to set up call chain Use the relative address, this makes get_srcline work correctly in the end. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 84390eecab06..d97309c87bd6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1406,7 +1406,7 @@ static int add_callchain_ip(struct thread *thread, } } - return callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); + return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, -- cgit v1.2.3 From 2989ccaac48f8c3da7f77101bbf98e0ea8773d83 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:23 -0800 Subject: perf callchain: Use a common function to resolve symbol or name Refactor the duplicated code to resolve the symbol name or the address of a symbol into a single function. Used in next patch to add common functionality. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-6-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 17 ----------------- tools/perf/ui/gtk/hists.c | 11 +---------- tools/perf/ui/stdio/hist.c | 23 +++++++++-------------- tools/perf/util/callchain.c | 19 +++++++++++++++++++ tools/perf/util/callchain.h | 3 +++ 5 files changed, 32 insertions(+), 41 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cfb976b3de3a..12c17c5a3d68 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -463,23 +463,6 @@ out: return key; } -static char *callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize, bool show_dso) -{ - int printed; - - if (cl->ms.sym) - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); - else - printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); - - if (show_dso) - scnprintf(bf + printed, bfsize - printed, " %s", - cl->ms.map ? cl->ms.map->dso->short_name : "unknown"); - - return bf; -} - struct callchain_print_arg { /* for hists browser */ off_t row_offset; diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index fc654fb77ace..4b3585eed1e8 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -89,15 +89,6 @@ void perf_gtk__init_hpp(void) perf_gtk__hpp_color_overhead_acc; } -static void callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize) -{ - if (cl->ms.sym) - scnprintf(bf, bfsize, "%s", cl->ms.sym->name); - else - scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); -} - static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, GtkTreeIter *parent, int col, u64 total) { @@ -128,7 +119,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, scnprintf(buf, sizeof(buf), "%5.2f%%", percent); gtk_tree_store_set(store, &iter, 0, buf, -1); - callchain_list__sym_name(chain, buf, sizeof(buf)); + callchain_list__sym_name(chain, buf, sizeof(buf), false); gtk_tree_store_set(store, &iter, col, buf, -1); if (need_new_parent) { diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 15b451acbde6..dfcbc90146ef 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -41,6 +41,7 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, { int i; size_t ret = 0; + char bf[1024]; ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) { @@ -56,11 +57,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, } else ret += fprintf(fp, "%s", " "); } - if (chain->ms.sym) - ret += fprintf(fp, "%s\n", chain->ms.sym->name); - else - ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); - + fputs(callchain_list__sym_name(chain, bf, sizeof(bf), false), fp); + fputc('\n', fp); return ret; } @@ -168,6 +166,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, struct rb_node *node; int i = 0; int ret = 0; + char bf[1024]; /* * If have one single callchain root, don't bother printing @@ -196,10 +195,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, } else ret += callchain__fprintf_left_margin(fp, left_margin); - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); + ret += fprintf(fp, "%s\n", callchain_list__sym_name(chain, bf, sizeof(bf), + false)); if (++entries_printed == callchain_param.print_limit) break; @@ -219,6 +216,7 @@ static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node, { struct callchain_list *chain; size_t ret = 0; + char bf[1024]; if (!node) return 0; @@ -229,11 +227,8 @@ static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node, list_for_each_entry(chain, &node->val, list) { if (chain->ip >= PERF_CONTEXT_MAX) continue; - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", - (void *)(long)chain->ip); + ret += fprintf(fp, " %s\n", callchain_list__sym_name(chain, + bf, sizeof(bf), false)); } return ret; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 00229809a904..38da69c8c1ff 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -808,3 +808,22 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * out: return 1; } + +char *callchain_list__sym_name(struct callchain_list *cl, + char *bf, size_t bfsize, bool show_dso) +{ + int printed; + + if (cl->ms.sym) { + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + } else + printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); + + if (show_dso) + scnprintf(bf + printed, bfsize - printed, " %s", + cl->ms.map ? + cl->ms.map->dso->short_name : + "unknown"); + + return bf; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3caccc2c173c..3e1ed15d11f1 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -193,4 +193,7 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, } #endif +char *callchain_list__sym_name(struct callchain_list *cl, + char *bf, size_t bfsize, bool show_dso); + #endif /* __PERF_CALLCHAIN_H */ -- cgit v1.2.3 From 2de217688e8f086bf6d920d530401b56fcbc6eff Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:25 -0800 Subject: perf tools: Only print base source file for srcline For perf report with --sort srcline only print the base source file name. This makes the results generally fit much better to the screen. The path is usually not that useful anyways because it is often from different systems. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-8-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index f3e4bc5fe5d2..77c180637138 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -274,7 +274,7 @@ char *get_srcline(struct dso *dso, unsigned long addr) if (!addr2line(dso_name, addr, &file, &line, dso)) goto out; - if (asprintf(&srcline, "%s:%u", file, line) < 0) { + if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) { free(file); goto out; } -- cgit v1.2.3 From e592488c01d51763de847fcecb3d969231a483a9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:26 -0800 Subject: perf annotate: Support source line numbers in annotate With srcline key/sort'ing it's useful to have line numbers in the annotate window. This patch implements this. Use objdump -l to request the line numbers and save them in the line structure. Then the browser displays them for source lines. The line numbers are not displayed by default, but can be toggled on with 'k' There is one unfortunate problem with this setup. For lines not containing source and which are outside functions objdump -l reports line numbers off by a few: it always reports the first line number in the next function even for lines that are outside the function. I haven't found a nice way to detect/correct this. Probably objdump has to be fixed. See https://sourceware.org/bugzilla/show_bug.cgi?id=16433 The line numbers are still useful even with these problems, as most are correct and the ones which are not are nearby. v2: Fix help text. Handle (discriminator...) output in objdump. Left align the line numbers. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-9-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 13 ++++++++++++- tools/perf/util/annotate.c | 30 +++++++++++++++++++++++++----- tools/perf/util/annotate.h | 1 + 3 files changed, 38 insertions(+), 6 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index f0697a3aede0..1e0a2fd80115 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -27,6 +27,7 @@ static struct annotate_browser_opt { bool hide_src_code, use_offset, jump_arrows, + show_linenr, show_nr_jumps; } annotate_browser__opts = { .use_offset = true, @@ -128,7 +129,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!*dl->line) slsmg_write_nstring(" ", width - pcnt_width); else if (dl->offset == -1) { - printed = scnprintf(bf, sizeof(bf), "%*s ", + if (dl->line_nr && annotate_browser__opts.show_linenr) + printed = scnprintf(bf, sizeof(bf), "%-*d ", + ab->addr_width + 1, dl->line_nr); + else + printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); slsmg_write_nstring(bf, printed); slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1); @@ -733,6 +738,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "o Toggle disassembler output/simplified view\n" "s Toggle source code view\n" "/ Search string\n" + "k Toggle line numbers\n" "r Run available scripts\n" "? Search string backwards\n"); continue; @@ -741,6 +747,10 @@ static int annotate_browser__run(struct annotate_browser *browser, script_browse(NULL); continue; } + case 'k': + annotate_browser__opts.show_linenr = + !annotate_browser__opts.show_linenr; + break; case 'H': nd = browser->curr_hot; break; @@ -984,6 +994,7 @@ static struct annotate_config { } annotate__configs[] = { ANNOTATE_CFG(hide_src_code), ANNOTATE_CFG(jump_arrows), + ANNOTATE_CFG(show_linenr), ANNOTATE_CFG(show_nr_jumps), ANNOTATE_CFG(use_offset), }; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 873c8778db20..e5670f1af737 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -17,11 +17,13 @@ #include "debug.h" #include "annotate.h" #include "evsel.h" +#include #include #include const char *disassembler_style; const char *objdump_path; +static regex_t file_lineno; static struct ins *ins__find(const char *name); static int disasm_line__parse(char *line, char **namep, char **rawp); @@ -570,13 +572,15 @@ out_free_name: return -1; } -static struct disasm_line *disasm_line__new(s64 offset, char *line, size_t privsize) +static struct disasm_line *disasm_line__new(s64 offset, char *line, + size_t privsize, int line_nr) { struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); if (dl != NULL) { dl->offset = offset; dl->line = strdup(line); + dl->line_nr = line_nr; if (dl->line == NULL) goto out_delete; @@ -788,13 +792,15 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - FILE *file, size_t privsize) + FILE *file, size_t privsize, + int *line_nr) { struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2, *c; size_t line_len; s64 line_ip, offset = -1; + regmatch_t match[2]; if (getline(&line, &line_len, file) < 0) return -1; @@ -812,6 +818,12 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, line_ip = -1; parsed_line = line; + /* /filename:linenr ? Save line number and ignore. */ + if (regexec(&file_lineno, line, 2, match, 0) == 0) { + *line_nr = atoi(line + match[1].rm_so); + return 0; + } + /* * Strip leading spaces: */ @@ -842,8 +854,9 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize); + dl = disasm_line__new(offset, parsed_line, privsize, *line_nr); free(line); + (*line_nr)++; if (dl == NULL) return -1; @@ -869,6 +882,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, return 0; } +static __attribute__((constructor)) void symbol__init_regexpr(void) +{ + regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); +} + static void delete_last_nop(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -904,6 +922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) char symfs_filename[PATH_MAX]; struct kcore_extract kce; bool delete_extract = false; + int lineno = 0; if (filename) symbol__join_symfs(symfs_filename, filename); @@ -984,7 +1003,7 @@ fallback: snprintf(command, sizeof(command), "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -d %s %s -C %s 2>/dev/null|grep -v %s|expand", + " -l -d %s %s -C %s 2>/dev/null|grep -v %s|expand", objdump_path ? objdump_path : "objdump", disassembler_style ? "-M " : "", disassembler_style ? disassembler_style : "", @@ -1001,7 +1020,8 @@ fallback: goto out_free_filename; while (!feof(file)) - if (symbol__parse_objdump_line(sym, map, file, privsize) < 0) + if (symbol__parse_objdump_line(sym, map, file, privsize, + &lineno) < 0) break; /* diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 112d6e268150..0784a9420528 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -58,6 +58,7 @@ struct disasm_line { char *line; char *name; struct ins *ins; + int line_nr; struct ins_operands ops; }; -- cgit v1.2.3 From b2d53671cdb0cf5070d56359821eb812669bb1ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Nov 2014 18:02:51 -0300 Subject: perf hists: Fix up srcline histogram key formatting Problem introduced in: commit 5b5916696051 "perf report: Honor column width setting" Where the left justification signal was after the width, which ended up, when the width was, say, 11, always printing: %11.11-s Instead of src:line left justified and limited to 11 chars. Resulting in a like: 70.93% %11.11-s [.] f2 tcall When it should instead be: 70.93% tcall.c:5 [.] f2 tcall Cc: Adrian Hunter Cc: Andi Kleen Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-2xnt0vqkoox52etq2qhyetr0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9402885a77f3..82a5596241a7 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -309,7 +309,7 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%*.*-s", width, width, he->srcline); + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline); } struct sort_entry sort_srcline = { -- cgit v1.2.3 From f140373bc904d9541e3f8e985d3810864e34c735 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 13 Nov 2014 18:21:03 +0100 Subject: perf evsel: Fix ftrace:function event recording Following patch fails (-EINVAL) ftrace:function with enabled user space callchains: cfa77bc4af2c perf: Disallow user-space callchains for function trace events We need to follow in perf tool itself and explicitly set the perf_event_attr::exclude_callchain_user flag for ftrace:function event. Reported-by: Steven Rostedt Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1415899263-24820-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34344ffa79ca..f2dc91fb87fa 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -658,6 +658,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap_data = track; } + /* + * We don't allow user space callchains for function trace + * event, due to issues with page faults while tracing page + * fault handler and its overall trickiness nature. + */ + if (perf_evsel__is_function_event(evsel)) + evsel->attr.exclude_callchain_user = 1; + if (callchain_param.enabled && !evsel->no_aux_samples) perf_evsel__config_callgraph(evsel); -- cgit v1.2.3 From f90d194a867a5a1db51789b8c4e99bcd196be6bb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Nov 2014 16:16:39 +0200 Subject: perf evlist: Do not poll events that use the system_wide flag The system_wide flag causes a selected event to be opened always without a pid. Consequently it will never get a POLLHUP, but it is used for tracking in combination with other events, so it should not need to be polled anyway. Therefore don't add it for polling. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415715423-15563-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7e23dae54f1d..cfbe2b99b9aa 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -816,7 +816,15 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, perf_evlist__mmap_get(evlist, idx); } - if (__perf_evlist__add_pollfd(evlist, fd, idx) < 0) { + /* + * The system_wide flag causes a selected event to be opened + * always without a pid. Consequently it will never get a + * POLLHUP, but it is used for tracking in combination with + * other events, so it should not need to be polled anyway. + * Therefore don't add it for polling. + */ + if (!evsel->system_wide && + __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { perf_evlist__mmap_put(evlist, idx); return -1; } -- cgit v1.2.3 From a84808083688d82d7f1e5786ccf5df0ff7d448cb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 11 Nov 2014 16:16:41 +0200 Subject: perf tools: Only override the default :tid comm entry Events may still be ordered even if there are no timestamps e.g. if the data is recorded per-thread. Also synthesized COMM events have a timestamp of zero. Consequently it is better to keep comm entries even if they have a timestamp of zero. However, when a struct thread is created the command string is not known and a comm entry with a string of the form ":" is used. In that case thread->comm_set is false and the comm entry should be overridden. Signed-off-by: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1415715423-15563-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index a2157f0ef1df..9ebc8b1f9be5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -103,15 +103,14 @@ struct comm *thread__exec_comm(const struct thread *thread) return last; } -/* CHECKME: time should always be 0 if event aren't ordered */ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) { struct comm *new, *curr = thread__comm(thread); int err; - /* Override latest entry if it had no specific time coverage */ - if (!curr->start && !curr->exec) { + /* Override the default :tid entry */ + if (!thread->comm_set) { err = comm__override(curr, str, timestamp, exec); if (err) return err; -- cgit v1.2.3 From 23f0981bbd89fcc1496d0490ec39ca7c91599e32 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:24 -0800 Subject: perf callchain: Enable printing the srcline in the history For lbr-as-callgraph we need to see the line number in the history, because many LBR entries can be in a single function, and just showing the same function name many times is not useful. When the history code is configured to sort by address, also try to resolve the address to a file:srcline and display this in the browser. If that doesn't work still display the address. This can be also useful without LBRs for understanding which call in a large function (or in which inlined function) called something else. Contains fixes from Namhyung Kim v2: Refactor code into common function v3: Fix GTK build v4: Rebase Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-7-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/callchain.c | 11 ++++++++++- tools/perf/util/callchain.h | 1 + tools/perf/util/srcline.c | 6 ++++-- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 38da69c8c1ff..b6624aeaaca9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -815,7 +815,16 @@ char *callchain_list__sym_name(struct callchain_list *cl, int printed; if (cl->ms.sym) { - printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + if (callchain_param.key == CCKEY_ADDRESS && + cl->ms.map && !cl->srcline) + cl->srcline = get_srcline(cl->ms.map->dso, + map__rip_2objdump(cl->ms.map, + cl->ip)); + if (cl->srcline) + printed = scnprintf(bf, bfsize, "%s %s", + cl->ms.sym->name, cl->srcline); + else + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); } else printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3e1ed15d11f1..3f158474c892 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -70,6 +70,7 @@ extern struct callchain_param callchain_param; struct callchain_list { u64 ip; struct map_symbol ms; + char *srcline; struct list_head list; }; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 77c180637138..ac877f96fed7 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -258,7 +258,7 @@ char *get_srcline(struct dso *dso, unsigned long addr) const char *dso_name; if (!dso->has_srcline) - return SRCLINE_UNKNOWN; + goto out; if (dso->symsrc_filename) dso_name = dso->symsrc_filename; @@ -289,7 +289,9 @@ out: dso->has_srcline = 0; dso__free_a2l(dso); } - return SRCLINE_UNKNOWN; + if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) + return SRCLINE_UNKNOWN; + return srcline; } void free_srcline(char *srcline) -- cgit v1.2.3 From aaba4e12a99cc56fc8614a3f2a3ec6db4fcde76e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Nov 2014 17:10:52 -0300 Subject: perf symbols: Move bfd_demangle stubbing to its only user We need to define bfd_demangle() to either a wrapper for cplus_demangle() or to a stub when NO_DEMANGLE is defined. That is at odds with using bfd.h for some other reason, as it defines bfd_demangle() and then if code that wants to use symbol.h, where the above stubbing/wrapping is done, and bfd.h for other reasons, we end up with a build error where bfd_demangle() is found to be redefined. Avoid that by moving the stubbing/wrapping to symbol-elf.c, that is the only user of such function. If we ever get to a point where there are more valid users, we can then introduce a header for that. Cc: Adrian Hunter Cc: Andi Kleen Cc: Borislav Petkov Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-6wzjpe2fy9xtgchshulixlzw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 21 +++++++++++++++++++++ tools/perf/util/symbol.h | 21 --------------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index efc7eb6b8f0f..06fcd1bf98b6 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -11,6 +11,27 @@ #include #include "debug.h" +#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT +extern char *cplus_demangle(const char *, int); + +static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) +{ + return cplus_demangle(c, i); +} +#else +#ifdef NO_DEMANGLE +static inline char *bfd_demangle(void __maybe_unused *v, + const char __maybe_unused *c, + int __maybe_unused i) +{ + return NULL; +} +#else +#define PACKAGE 'perf' +#include +#endif +#endif + #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT static int elf_getphdrnum(Elf *elf, size_t *dst) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ded3ca7266de..e0b297c50f9d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -23,27 +23,6 @@ #include "dso.h" -#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT -extern char *cplus_demangle(const char *, int); - -static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i) -{ - return cplus_demangle(c, i); -} -#else -#ifdef NO_DEMANGLE -static inline char *bfd_demangle(void __maybe_unused *v, - const char __maybe_unused *c, - int __maybe_unused i) -{ - return NULL; -} -#else -#define PACKAGE 'perf' -#include -#endif -#endif - /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; * for newer versions we can use mmap to reduce memory usage: -- cgit v1.2.3 From 85c116a6cb91a5c09b7a6c95ffc6a6cbd32cd237 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:27 -0800 Subject: perf callchain: Make get_srcline fall back to sym+offset When the source line is not found fall back to sym + offset. This is generally much more useful than a raw address. For this we need to pass in the symbol from the caller. For some callers it's awkward to compute, so we stay at the old behaviour. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-10-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- tools/perf/util/callchain.c | 3 ++- tools/perf/util/map.c | 2 +- tools/perf/util/sort.c | 6 ++++-- tools/perf/util/srcline.c | 11 +++++++++-- tools/perf/util/util.h | 4 +++- 6 files changed, 20 insertions(+), 8 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e5670f1af737..79999ceaf2be 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1192,7 +1192,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, goto next; offset = start + i; - src_line->path = get_srcline(map->dso, offset); + src_line->path = get_srcline(map->dso, offset, NULL, false); insert_source_line(&tmp_root, src_line); next: diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index b6624aeaaca9..517ed84db97a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -819,7 +819,8 @@ char *callchain_list__sym_name(struct callchain_list *cl, cl->ms.map && !cl->srcline) cl->srcline = get_srcline(cl->ms.map->dso, map__rip_2objdump(cl->ms.map, - cl->ip)); + cl->ip), + cl->ms.sym, false); if (cl->srcline) printed = scnprintf(bf, bfsize, "%s %s", cl->ms.sym->name, cl->srcline); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 040a785c857b..62ca9f2607d5 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -360,7 +360,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr)); + map__rip_2objdump(map, addr), NULL, true); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 82a5596241a7..9139dda9f9a3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -291,7 +291,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) else { struct map *map = left->ms.map; left->srcline = get_srcline(map->dso, - map__rip_2objdump(map, left->ip)); + map__rip_2objdump(map, left->ip), + left->ms.sym, true); } } if (!right->srcline) { @@ -300,7 +301,8 @@ sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) else { struct map *map = right->ms.map; right->srcline = get_srcline(map->dso, - map__rip_2objdump(map, right->ip)); + map__rip_2objdump(map, right->ip), + right->ms.sym, true); } } return strcmp(right->srcline, left->srcline); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index ac877f96fed7..e73b6a5c9e0f 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -8,6 +8,8 @@ #include "util/util.h" #include "util/debug.h" +#include "symbol.h" + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -250,7 +252,8 @@ void dso__free_a2l(struct dso *dso __maybe_unused) */ #define A2L_FAIL_LIMIT 123 -char *get_srcline(struct dso *dso, unsigned long addr) +char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym, + bool show_sym) { char *file = NULL; unsigned line = 0; @@ -289,7 +292,11 @@ out: dso->has_srcline = 0; dso__free_a2l(dso); } - if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) + if (sym) { + if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "", + addr - sym->start) < 0) + return SRCLINE_UNKNOWN; + } else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; return srcline; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 76d23d83eae5..419bee030f83 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -337,8 +337,10 @@ static inline int path__join3(char *bf, size_t size, } struct dso; +struct symbol; -char *get_srcline(struct dso *dso, unsigned long addr); +char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym, + bool show_sym); void free_srcline(char *srcline); int filename__read_int(const char *filename, int *value); -- cgit v1.2.3 From 330dfa224fcc8594977785a6493ca06d124f0cfe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 18 Nov 2014 13:30:28 +0900 Subject: perf tools: Fix segfault due to invalid kernel dso access Jiri reported that the commit 96d78059d6d9 ("perf tools: Make vmlinux short name more like kallsyms short name") segfaults on perf script. When processing kernel mmap event, it should access the 'kernel' variable as sometimes it cannot find a matching dso from build-id table so 'dso' might be invalid. Reported-by: Jiri Olsa Tested-by: Jiri Olsa Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1416285028-30572-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d97309c87bd6..b75b487574c7 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1106,8 +1106,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (__machine__create_kernel_maps(machine, kernel) < 0) goto out_problem; - if (strstr(dso->long_name, "vmlinux")) - dso__set_short_name(dso, "[kernel.vmlinux]", false); + if (strstr(kernel->long_name, "vmlinux")) + dso__set_short_name(kernel, "[kernel.vmlinux]", false); machine__set_kernel_mmap_len(machine, event); -- cgit v1.2.3 From f78eaef0e0493f6068777a246b9c4d9d5cf2b7aa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 21 Nov 2014 13:38:00 -0800 Subject: perf tools: Allow to force redirect pr_debug to stderr. When debugging the tui browser I find it useful to redirect the debug log into a file. Currently it's always forced to the message line. Add an option to force it to stderr. Then it can be easily redirected. Example: [root@zoo ~]# perf --debug stderr report -vv 2> /tmp/debug [root@zoo ~]# tail /tmp/debug dso open failed, mmap: No such file or directory dso open failed, mmap: No such file or directory dso open failed, mmap: No such file or directory dso open failed, mmap: No such file or directory dso open failed, mmap: No such file or directory Using /root/.debug/.build-id/4e/841948927029fb650132253642d5dbb2c1fb93 for symbols Failed to open /tmp/perf-8831.map, continuing without symbols Failed to open /tmp/perf-12721.map, continuing without symbols Failed to open /tmp/perf-6966.map, continuing without symbols Failed to open /tmp/perf-8802.map, continuing without symbols [root@zoo ~]# Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1416605880-25055-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ba357f3226c6..ad60b2f20258 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -19,13 +19,14 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; +static int redirect_to_stderr; static int _eprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; if (var >= level) { - if (use_browser >= 1) + if (use_browser >= 1 && !redirect_to_stderr) ui_helpline__vshow(fmt, args); else ret = vfprintf(stderr, fmt, args); @@ -145,6 +146,7 @@ static struct debug_variable { } debug_variables[] = { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, + { .name = "stderr", .ptr = &redirect_to_stderr}, { .name = NULL, } }; -- cgit v1.2.3 From 857a94a226d7d345c3f492d5679e802e59f824a9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Nov 2014 10:31:05 +0100 Subject: perf evsel: Introduce perf_evsel__compute_deltas function Making compute_deltas functions global and renaming it to perf_evsel__compute_deltas. It will be used in stat command in later patch. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 9 ++++----- tools/perf/util/evsel.h | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f2dc91fb87fa..1c73bc4d57d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -876,9 +876,8 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -static inline void compute_deltas(struct perf_evsel *evsel, - int cpu, - struct perf_counts_values *count) +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, + struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -913,7 +912,7 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) return -errno; - compute_deltas(evsel, cpu, &count); + perf_evsel__compute_deltas(evsel, cpu, &count); if (scale) { if (count.run == 0) @@ -956,7 +955,7 @@ int __perf_evsel__read(struct perf_evsel *evsel, } } - compute_deltas(evsel, -1, aggr); + perf_evsel__compute_deltas(evsel, -1, aggr); evsel->counts->scaled = 0; if (scale) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 979790951bfb..746b7ea84589 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -110,6 +110,9 @@ struct thread_map; struct perf_evlist; struct record_opts; +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, + struct perf_counts_values *count); + int perf_evsel__object_config(size_t object_size, int (*init)(struct perf_evsel *evsel), void (*fini)(struct perf_evsel *evsel)); -- cgit v1.2.3 From 13112bbf595d4081f291f7061bb096dbf4401d41 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Nov 2014 10:31:06 +0100 Subject: perf evsel: Introduce perf_counts_values__scale function Factoring out scale login into perf_counts_values__scale function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 47 ++++++++++++++++++++++------------------------- tools/perf/util/evsel.h | 3 +++ 2 files changed, 25 insertions(+), 25 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1c73bc4d57d3..6dc7a67e6d35 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -897,6 +897,26 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, count->run = count->run - tmp.run; } +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, s8 *pscaled) +{ + s8 scaled = 0; + + if (scale) { + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double) count->val * count->ena / count->run + 0.5); + } + } else + count->ena = count->run = 0; + + if (pscaled) + *pscaled = scaled; +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale) { @@ -913,15 +933,7 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, return -errno; perf_evsel__compute_deltas(evsel, cpu, &count); - - if (scale) { - if (count.run == 0) - count.val = 0; - else if (count.run < count.ena) - count.val = (u64)((double)count.val * count.ena / count.run + 0.5); - } else - count.ena = count.run = 0; - + perf_counts_values__scale(&count, scale, NULL); evsel->counts->cpu[cpu] = count; return 0; } @@ -956,22 +968,7 @@ int __perf_evsel__read(struct perf_evsel *evsel, } perf_evsel__compute_deltas(evsel, -1, aggr); - - evsel->counts->scaled = 0; - if (scale) { - if (aggr->run == 0) { - evsel->counts->scaled = -1; - aggr->val = 0; - return 0; - } - - if (aggr->run < aggr->ena) { - evsel->counts->scaled = 1; - aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); - } - } else - aggr->ena = aggr->run = 0; - + perf_counts_values__scale(aggr, scale, &evsel->counts->scaled); return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 746b7ea84589..7af0377ceb18 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -110,6 +110,9 @@ struct thread_map; struct perf_evlist; struct record_opts; +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, s8 *pscaled); + void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, struct perf_counts_values *count); -- cgit v1.2.3 From 011dccbdd93b7022c5c67e7c55fa8b5030b5e03d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Nov 2014 10:31:07 +0100 Subject: perf evsel: Introduce perf_evsel__read_cb function Adding perf_evsel__read_cb read function that retuns count values via callback. It will be used later in stat command as single way to retrieve counter values. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 16 ++++++++++++++++ tools/perf/util/evsel.h | 7 +++++++ 2 files changed, 23 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6dc7a67e6d35..2d26b7ad6fe0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -917,6 +917,22 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } +int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, + perf_evsel__read_cb_t cb) +{ + struct perf_counts_values count; + + memset(&count, 0, sizeof(count)); + + if (FD(evsel, cpu, thread) < 0) + return -EINVAL; + + if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) + return -errno; + + return cb(evsel, cpu, thread, &count); +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 7af0377ceb18..5c93bed8e8d9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -233,6 +233,13 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (a)->attr.type == (b)->attr.type && \ (a)->attr.config == (b)->attr.config) +typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, + int cpu, int thread, + struct perf_counts_values *count); + +int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, + perf_evsel__read_cb_t cb); + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); -- cgit v1.2.3 From 044330c1840e1ece97136d78a15484c867e2faaa Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 21 Nov 2014 10:31:12 +0100 Subject: perf tools: Add per-pkg format file parsing The .per-pkg file indicates that all but one value per socket should be discarded. Adding support to check up this file and set event flag accordingly. This patch is part of Matt's original patch: http://marc.info/?l=linux-kernel&m=141527675002139&w=2 only the file parsing part, the rest is solved differently. Signed-off-by: Matt Fleming Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-9-git-send-email-jolsa@kernel.org Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 27 +++++++++++++++++++++++++++ tools/perf/util/pmu.h | 2 ++ 4 files changed, 31 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 5c93bed8e8d9..792b0ea8a8b8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -91,6 +91,7 @@ struct perf_evsel { bool immediate; bool system_wide; bool tracking; + bool per_pkg; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c659a3ca1283..5a373483f0e4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -681,6 +681,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx, if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; + evsel->per_pkg = info.per_pkg; } return evsel ? 0 : -ENOMEM; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 881b75490533..f003b5a9e059 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -163,6 +163,24 @@ error: return -1; } +static int +perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + close(fd); + + alias->per_pkg = true; + return 0; +} + static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) { struct perf_pmu_alias *alias; @@ -181,6 +199,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI INIT_LIST_HEAD(&alias->terms); alias->scale = 1.0; alias->unit[0] = '\0'; + alias->per_pkg = false; ret = parse_events_terms(&alias->terms, buf); if (ret) { @@ -194,6 +213,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI */ perf_pmu__parse_unit(alias, dir, name); perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); list_add_tail(&alias->list, list); @@ -209,6 +229,8 @@ static inline bool pmu_alias_info_file(char *name) return true; if (len > 6 && !strcmp(name + len - 6, ".scale")) return true; + if (len > 8 && !strcmp(name + len - 8, ".per-pkg")) + return true; return false; } @@ -649,6 +671,8 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_alias *alias; int ret; + info->per_pkg = false; + /* * Mark unit and scale as not set * (different from default values, see below) @@ -668,6 +692,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (ret) return ret; + if (alias->per_pkg) + info->per_pkg = true; + list_del(&term->list); free(term); } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 8092de78e818..c3a74e0e17a2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -29,6 +29,7 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; double scale; + bool per_pkg; }; #define UNIT_MAX_LEN 31 /* max length for event unit name */ @@ -39,6 +40,7 @@ struct perf_pmu_alias { struct list_head list; /* ELEM */ char unit[UNIT_MAX_LEN+1]; double scale; + bool per_pkg; }; struct perf_pmu *perf_pmu__find(const char *name); -- cgit v1.2.3 From 1d9e446b91e182055d874fbb30150aad479a4981 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Nov 2014 10:31:13 +0100 Subject: perf tools: Add snapshot format file parsing The .snapshot file indicates that the provided event value is a snapshot value and we have to bypass the delta computation logic. Adding support to check up this file and set event flag accordingly. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 47 ++++++++++++++++++++++++++++++++---------- tools/perf/util/pmu.h | 2 ++ 4 files changed, 40 insertions(+), 11 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 792b0ea8a8b8..b18d58da580b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -73,6 +73,7 @@ struct perf_evsel { char *name; double scale; const char *unit; + bool snapshot; struct event_format *tp_format; union { void *priv; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a373483f0e4..77b43fe43d55 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -682,6 +682,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx, evsel->unit = info.unit; evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; + evsel->snapshot = info.snapshot; } return evsel ? 0 : -ENOMEM; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f003b5a9e059..5c9c4947cfb4 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -181,6 +181,23 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) return 0; } +static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, + char *dir, char *name) +{ + char path[PATH_MAX]; + int fd; + + snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name); + + fd = open(path, O_RDONLY); + if (fd == -1) + return -1; + + alias->snapshot = true; + close(fd); + return 0; +} + static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) { struct perf_pmu_alias *alias; @@ -214,6 +231,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI perf_pmu__parse_unit(alias, dir, name); perf_pmu__parse_scale(alias, dir, name); perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); list_add_tail(&alias->list, list); @@ -231,6 +249,8 @@ static inline bool pmu_alias_info_file(char *name) return true; if (len > 8 && !strcmp(name + len - 8, ".per-pkg")) return true; + if (len > 9 && !strcmp(name + len - 9, ".snapshot")) + return true; return false; } @@ -639,23 +659,27 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, } -static int check_unit_scale(struct perf_pmu_alias *alias, - const char **unit, double *scale) +static int check_info_data(struct perf_pmu_alias *alias, + struct perf_pmu_info *info) { /* * Only one term in event definition can - * define unit and scale, fail if there's - * more than one. + * define unit, scale and snapshot, fail + * if there's more than one. */ - if ((*unit && alias->unit) || - (*scale && alias->scale)) + if ((info->unit && alias->unit) || + (info->scale && alias->scale) || + (info->snapshot && alias->snapshot)) return -EINVAL; if (alias->unit) - *unit = alias->unit; + info->unit = alias->unit; if (alias->scale) - *scale = alias->scale; + info->scale = alias->scale; + + if (alias->snapshot) + info->snapshot = alias->snapshot; return 0; } @@ -677,8 +701,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, * Mark unit and scale as not set * (different from default values, see below) */ - info->unit = NULL; - info->scale = 0.0; + info->unit = NULL; + info->scale = 0.0; + info->snapshot = false; list_for_each_entry_safe(term, h, head_terms, list) { alias = pmu_find_alias(pmu, term); @@ -688,7 +713,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, if (ret) return ret; - ret = check_unit_scale(alias, &info->unit, &info->scale); + ret = check_info_data(alias, info); if (ret) return ret; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index c3a74e0e17a2..6b1249fbdb5f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -30,6 +30,7 @@ struct perf_pmu_info { const char *unit; double scale; bool per_pkg; + bool snapshot; }; #define UNIT_MAX_LEN 31 /* max length for event unit name */ @@ -41,6 +42,7 @@ struct perf_pmu_alias { char unit[UNIT_MAX_LEN+1]; double scale; bool per_pkg; + bool snapshot; }; struct perf_pmu *perf_pmu__find(const char *name); -- cgit v1.2.3 From a5a7fd76b55a6e6916ff22e5c8fdb39a8381be2c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Nov 2014 10:31:11 +0100 Subject: perf tools: Remove perf_evsel__read interface Removing the perf_evsel__read interfaces because we replaced the only user in the stat command code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 34 ---------------------------------- tools/perf/util/evsel.h | 29 ----------------------------- 2 files changed, 63 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2d26b7ad6fe0..1e90c8557ede 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -954,40 +954,6 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, return 0; } -int __perf_evsel__read(struct perf_evsel *evsel, - int ncpus, int nthreads, bool scale) -{ - size_t nv = scale ? 3 : 1; - int cpu, thread; - struct perf_counts_values *aggr = &evsel->counts->aggr, count; - - if (evsel->system_wide) - nthreads = 1; - - aggr->val = aggr->ena = aggr->run = 0; - - for (cpu = 0; cpu < ncpus; cpu++) { - for (thread = 0; thread < nthreads; thread++) { - if (FD(evsel, cpu, thread) < 0) - continue; - - if (readn(FD(evsel, cpu, thread), - &count, nv * sizeof(u64)) < 0) - return -errno; - - aggr->val += count.val; - if (scale) { - aggr->ena += count.ena; - aggr->run += count.run; - } - } - } - - perf_evsel__compute_deltas(evsel, -1, aggr); - perf_counts_values__scale(aggr, scale, &evsel->counts->scaled); - return 0; -} - static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) { struct perf_evsel *leader = evsel->leader; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b18d58da580b..3207f4861038 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -271,35 +271,6 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); } -int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads, - bool scale); - -/** - * perf_evsel__read - Read the aggregate results on all CPUs - * - * @evsel - event selector to read value - * @ncpus - Number of cpus affected, from zero - * @nthreads - Number of threads affected, from zero - */ -static inline int perf_evsel__read(struct perf_evsel *evsel, - int ncpus, int nthreads) -{ - return __perf_evsel__read(evsel, ncpus, nthreads, false); -} - -/** - * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled - * - * @evsel - event selector to read value - * @ncpus - Number of cpus affected, from zero - * @nthreads - Number of threads affected, from zero - */ -static inline int perf_evsel__read_scaled(struct perf_evsel *evsel, - int ncpus, int nthreads) -{ - return __perf_evsel__read(evsel, ncpus, nthreads, true); -} - int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); -- cgit v1.2.3 From 779d0b997e0787fc5f80110159b6c18ae0fae395 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 21 Nov 2014 10:31:14 +0100 Subject: perf stat: Add support for per-pkg counters The .per-pkg file indicates that all but one value per socket should be discarded. Adding the logic of skipping the rest of the socket once first value was read. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1416562275-12404-11-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 2 files changed, 50 insertions(+) (limited to 'tools/perf/util') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index b24a7a08bd1d..860e8ad06616 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -388,10 +388,56 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) update_stats(&runtime_itlb_cache_stats[0], count[0]); } +static void zero_per_pkg(struct perf_evsel *counter) +{ + if (counter->per_pkg_mask) + memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); +} + +static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +{ + unsigned long *mask = counter->per_pkg_mask; + struct cpu_map *cpus = perf_evsel__cpus(counter); + int s; + + *skip = false; + + if (!counter->per_pkg) + return 0; + + if (cpu_map__empty(cpus)) + return 0; + + if (!mask) { + mask = zalloc(MAX_NR_CPUS); + if (!mask) + return -ENOMEM; + + counter->per_pkg_mask = mask; + } + + s = cpu_map__get_socket(cpus, cpu); + if (s < 0) + return -1; + + *skip = test_and_set_bit(s, mask) == 1; + return 0; +} + static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; + static struct perf_counts_values zero; + bool skip = false; + + if (check_per_pkg(evsel, cpu, &skip)) { + pr_err("failed to read per-pkg counter\n"); + return -1; + } + + if (skip) + count = &zero; switch (aggr_mode) { case AGGR_CORE: @@ -465,6 +511,9 @@ static int read_counter(struct perf_evsel *counter) if (counter->system_wide) nthreads = 1; + if (counter->per_pkg) + zero_per_pkg(counter); + for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 3207f4861038..38622747d130 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -93,6 +93,7 @@ struct perf_evsel { bool system_wide; bool tracking; bool per_pkg; + unsigned long *per_pkg_mask; /* parse modifier helper */ int exclude_GH; int nr_members; -- cgit v1.2.3 From 8b7bad58efb7e3aaff60f7c1fa4361fb8c23181d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 12 Nov 2014 18:05:20 -0800 Subject: perf callchain: Support handling complete branch stacks as histograms Currently branch stacks can be only shown as edge histograms for individual branches. I never found this display particularly useful. This implements an alternative mode that creates histograms over complete branch traces, instead of individual branches, similar to how normal callgraphs are handled. This is done by putting it in front of the normal callgraph and then using the normal callgraph histogram infrastructure to unify them. This way in complex functions we can understand the control flow that lead to a particular sample, and may even see some control flow in the caller for short functions. Example (simplified, of course for such simple code this is usually not needed), please run this after the whole patchkit is in, as at this point in the patch order there is no --branch-history, that will be added in a patch after this one: tcall.c: volatile a = 10000, b = 100000, c; __attribute__((noinline)) f2() { c = a / b; } __attribute__((noinline)) f1() { f2(); f2(); } main() { int i; for (i = 0; i < 1000000; i++) f1(); } % perf record -b -g ./tsrc/tcall [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data (~1923 samples) ] % perf report --no-children --branch-history ... 54.91% tcall.c:6 [.] f2 tcall | |--65.53%-- f2 tcall.c:5 | | | |--70.83%-- f1 tcall.c:11 | | f1 tcall.c:10 | | main tcall.c:18 | | main tcall.c:18 | | main tcall.c:17 | | main tcall.c:17 | | f1 tcall.c:13 | | f1 tcall.c:13 | | f2 tcall.c:7 | | f2 tcall.c:5 | | f1 tcall.c:12 | | f1 tcall.c:12 | | f2 tcall.c:7 | | f2 tcall.c:5 | | f1 tcall.c:11 | | | --29.17%-- f1 tcall.c:12 | f1 tcall.c:12 | f2 tcall.c:7 | f2 tcall.c:5 | f1 tcall.c:11 | f1 tcall.c:10 | main tcall.c:18 | main tcall.c:18 | main tcall.c:17 | main tcall.c:17 | f1 tcall.c:13 | f1 tcall.c:13 | f2 tcall.c:7 | f2 tcall.c:5 | f1 tcall.c:12 The default output is unchanged. This is only implemented in perf report, no change to record or anywhere else. This adds the basic code to report: - add a new "branch" option to the -g option parser to enable this mode - when the flag is set include the LBR into the callstack in machine.c. The rest of the history code is unchanged and doesn't know the difference between LBR entry and normal call entry. - detect overlaps with the callchain - remove small loop duplicates in the LBR Current limitations: - The LBR flags (mispredict etc.) are not shown in the history and LBR entries have no special marker. - It would be nice if annotate marked the LBR entries somehow (e.g. with arrows) v2: Various fixes. v3: Merge further patches into this one. Fix white space. v4: Improve manpage. Address review feedback. v5: Rename functions. Better error message without -g. Fix crash without -b. v6: Rebase v7: Rebase. Use NO_ENTRY in memset. v8: Port to latest tip. Move add_callchain_ip to separate patch. Skip initial entries in callchain. Minor cleanups. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1415844328-4884-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 7 +- tools/perf/builtin-report.c | 4 +- tools/perf/util/callchain.c | 4 + tools/perf/util/callchain.h | 1 + tools/perf/util/machine.c | 126 ++++++++++++++++++++++++++++--- tools/perf/util/symbol.h | 3 +- 6 files changed, 132 insertions(+), 13 deletions(-) (limited to 'tools/perf/util') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 0927bf4e6c2a..22706beffabc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -159,7 +159,7 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. --g [type,min[,limit],order[,key]]:: +-g [type,min[,limit],order[,key][,branch]]:: --call-graph:: Display call chains using type, min percent threshold, optional print limit and order. @@ -177,6 +177,11 @@ OPTIONS - function: compare on functions - address: compare on individual code addresses + branch can be: + - branch: include last branch information in callgraph + when available. Usually more convenient to use --branch-history + for this. + Default: fractal,0.5,callee,function. --children:: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 140a6cd88351..410d44fac64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -637,8 +637,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", - "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " + OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order[,branch]", + "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), add branches. " "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, "Accumulate callchains of children and show total overhead as well"), diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 517ed84db97a..cf524a35cc84 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -149,6 +149,10 @@ static int parse_callchain_sort_key(const char *value) callchain_param.key = CCKEY_ADDRESS; return 0; } + if (!strncmp(value, "branch", strlen(value))) { + callchain_param.branch_callstack = 1; + return 0; + } return -1; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 3f158474c892..dbc08cf5f970 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -63,6 +63,7 @@ struct callchain_param { sort_chain_func_t sort; enum chain_order order; enum chain_key key; + bool branch_callstack; }; extern struct callchain_param callchain_param; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b75b487574c7..15dd0a9691ce 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -12,6 +12,7 @@ #include #include #include "unwind.h" +#include "linux/hash.h" static void dsos__init(struct dsos *dsos) { @@ -1391,7 +1392,11 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + if (cpumode == -1) + thread__find_cpumode_addr_location(thread, MAP__FUNCTION, + ip, &al); + else + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, ip, &al); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -1427,8 +1432,50 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, return bi; } +#define CHASHSZ 127 +#define CHASHBITS 7 +#define NO_ENTRY 0xff + +#define PERF_MAX_BRANCH_DEPTH 127 + +/* Remove loops. */ +static int remove_loops(struct branch_entry *l, int nr) +{ + int i, j, off; + unsigned char chash[CHASHSZ]; + + memset(chash, NO_ENTRY, sizeof(chash)); + + BUG_ON(PERF_MAX_BRANCH_DEPTH > 255); + + for (i = 0; i < nr; i++) { + int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ; + + /* no collision handling for now */ + if (chash[h] == NO_ENTRY) { + chash[h] = i; + } else if (l[chash[h]].from == l[i].from) { + bool is_loop = true; + /* check if it is a real loop */ + off = 0; + for (j = chash[h]; j < i && i + off < nr; j++, off++) + if (l[j].from != l[i + off].from) { + is_loop = false; + break; + } + if (is_loop) { + memmove(l + i, l + i + off, + (nr - (i + off)) * sizeof(*l)); + nr -= off; + } + } + } + return nr; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain, + struct branch_stack *branch, struct symbol **parent, struct addr_location *root_al, int max_stack) @@ -1438,22 +1485,82 @@ static int thread__resolve_callchain_sample(struct thread *thread, int i; int j; int err; - int skip_idx __maybe_unused; + int skip_idx = -1; + int first_call = 0; + + /* + * Based on DWARF debug information, some architectures skip + * a callchain entry saved by the kernel. + */ + if (chain->nr < PERF_MAX_STACK_DEPTH) + skip_idx = arch_skip_callchain_idx(thread, chain); callchain_cursor_reset(&callchain_cursor); + /* + * Add branches to call stack for easier browsing. This gives + * more context for a sample than just the callers. + * + * This uses individual histograms of paths compared to the + * aggregated histograms the normal LBR mode uses. + * + * Limitations for now: + * - No extra filters + * - No annotations (should annotate somehow) + */ + + if (branch && callchain_param.branch_callstack) { + int nr = min(max_stack, (int)branch->nr); + struct branch_entry be[nr]; + + if (branch->nr > PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted branch chain. skipping...\n"); + goto check_calls; + } + + for (i = 0; i < nr; i++) { + if (callchain_param.order == ORDER_CALLEE) { + be[i] = branch->entries[i]; + /* + * Check for overlap into the callchain. + * The return address is one off compared to + * the branch entry. To adjust for this + * assume the calling instruction is not longer + * than 8 bytes. + */ + if (i == skip_idx || + chain->ips[first_call] >= PERF_CONTEXT_MAX) + first_call++; + else if (be[i].from < chain->ips[first_call] && + be[i].from >= chain->ips[first_call] - 8) + first_call++; + } else + be[i] = branch->entries[branch->nr - i - 1]; + } + + nr = remove_loops(be, nr); + + for (i = 0; i < nr; i++) { + err = add_callchain_ip(thread, parent, root_al, + -1, be[i].to); + if (!err) + err = add_callchain_ip(thread, parent, root_al, + -1, be[i].from); + if (err == -EINVAL) + break; + if (err) + return err; + } + chain_nr -= nr; + } + +check_calls: if (chain->nr > PERF_MAX_STACK_DEPTH) { pr_warning("corrupted callchain. skipping...\n"); return 0; } - /* - * Based on DWARF debug information, some architectures skip - * a callchain entry saved by the kernel. - */ - skip_idx = arch_skip_callchain_idx(thread, chain); - - for (i = 0; i < chain_nr; i++) { + for (i = first_call; i < chain_nr; i++) { u64 ip; if (callchain_param.order == ORDER_CALLEE) @@ -1517,6 +1624,7 @@ int thread__resolve_callchain(struct thread *thread, int max_stack) { int ret = thread__resolve_callchain_sample(thread, sample->callchain, + sample->branch_stack, parent, root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0b297c50f9d..9d602e9c6f59 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -102,7 +102,8 @@ struct symbol_conf { demangle, demangle_kernel, filter_relative, - show_hist_headers; + show_hist_headers, + branch_callstack; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3