From c1f47b454ce759d7b13604137a233cad4617e1e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Jun 2009 13:58:51 +0200 Subject: perf_counter tools: Fix vmlinux fallback when running on a different kernel Lucas De Marchi reported that perf report and perf annotate displays mismatching profile if a perf.data is analyzed on an older kernel - even if the correct vmlinux is specified via the -k option. The reason is the fallback path in util/symbol.c:dso__load_kernel(): int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose) { int err = -1; if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); if (err) err = dso__load_kallsyms(self, filter, verbose); return err; } dso__load_vmlinux() returns negative on error, but on success it returns the number of symbols loaded - which confuses the function to load the kallsyms. This is normally harmless, as reporting is usually performed on the same kernel that is analyzed - but if there's a mismatch then we load the wrong kallsyms and create a non-sensical symbol tree. The fix is to only fall back to kallsyms on errors. Reported-by: Lucas De Marchi Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 86e14375e74e..01b62fa03996 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -629,7 +629,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); - if (err) + if (err < 0) err = dso__load_kallsyms(self, filter, verbose); return err; -- cgit v1.2.3 From 51e268423151fc7bb41945bde7843160b6a14c32 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 16:43:14 +0530 Subject: perf_counter tools: Define separate declarations for H/W and S/W events Define separate declarations for H/W and S/W events to: 1. Shorten name to save some space so that we can add more members 2. Fix alignment 3. Avoid declaring HARDWARE/SOFTWARE again and again. Removed unused CR(x, y) Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245669194.17153.6.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 35d04da38d6a..12abab3a0d63 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -18,30 +18,30 @@ struct event_symbol { char *symbol; }; -#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y -#define CR(x, y) .type = PERF_TYPE_##x, .config = y +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, - - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", }, + { CHW(CPU_CYCLES), "cycles", }, + { CHW(INSTRUCTIONS), "instructions", }, + { CHW(CACHE_REFERENCES), "cache-references", }, + { CHW(CACHE_MISSES), "cache-misses", }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, + { CHW(BRANCH_INSTRUCTIONS), "branches", }, + { CHW(BRANCH_MISSES), "branch-misses", }, + { CHW(BUS_CYCLES), "bus-cycles", }, + + { CSW(CPU_CLOCK), "cpu-clock", }, + { CSW(TASK_CLOCK), "task-clock", }, + { CSW(PAGE_FAULTS), "page-faults", }, + { CSW(PAGE_FAULTS), "faults", }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", }, + { CSW(CONTEXT_SWITCHES), "context-switches", }, + { CSW(CONTEXT_SWITCHES), "cs", }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", }, + { CSW(CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ -- cgit v1.2.3 From 74d5b5889ea71a95d8924c08f8a7c6e2bdcbc0ba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 16:44:28 +0530 Subject: perf_counter tools: Introduce alias member in event_symbol By introducing alias member in event_symbol : 1. duplicate lines are removed, like: cpu-cycles and cycles branch-instructions and branches context-switches and cs cpu-migrations and migrations 2. We can also add alias for another events. Now ./perf list looks like : List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults [Software event] faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245669268.17153.8.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 63 +++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 12abab3a0d63..f5695486ad3f 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -16,32 +16,29 @@ struct event_symbol { u8 type; u64 config; char *symbol; + char *alias; }; #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", }, - { CHW(CPU_CYCLES), "cycles", }, - { CHW(INSTRUCTIONS), "instructions", }, - { CHW(CACHE_REFERENCES), "cache-references", }, - { CHW(CACHE_MISSES), "cache-misses", }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, - { CHW(BRANCH_INSTRUCTIONS), "branches", }, - { CHW(BRANCH_MISSES), "branch-misses", }, - { CHW(BUS_CYCLES), "bus-cycles", }, - - { CSW(CPU_CLOCK), "cpu-clock", }, - { CSW(TASK_CLOCK), "task-clock", }, - { CSW(PAGE_FAULTS), "page-faults", }, - { CSW(PAGE_FAULTS), "faults", }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", }, - { CSW(CONTEXT_SWITCHES), "context-switches", }, - { CSW(CONTEXT_SWITCHES), "cs", }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", }, - { CSW(CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(INSTRUCTIONS), "instructions", "" }, + { CHW(CACHE_REFERENCES), "cache-references", "" }, + { CHW(CACHE_MISSES), "cache-misses", "" }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, + { CHW(BRANCH_MISSES), "branch-misses", "" }, + { CHW(BUS_CYCLES), "bus-cycles", "" }, + + { CSW(CPU_CLOCK), "cpu-clock", "" }, + { CSW(TASK_CLOCK), "task-clock", "" }, + { CSW(PAGE_FAULTS), "page-faults", "" }, + { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -196,6 +193,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a return 0; } +static int check_events(const char *str, unsigned int i) +{ + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return 1; + + if (strlen(event_symbols[i].alias)) + if (!strncmp(str, event_symbols[i].alias, + strlen(event_symbols[i].alias))) + return 1; + return 0; +} + /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. @@ -235,9 +245,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) } for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) { - + if (check_events(str, i)) { attr->type = event_symbols[i].type; attr->config = event_symbols[i].config; @@ -289,6 +297,7 @@ void print_events(void) { struct event_symbol *syms = event_symbols; unsigned int i, type, prev_type = -1; + char name[40]; fprintf(stderr, "\n"); fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); @@ -301,14 +310,18 @@ void print_events(void) if (type != prev_type) fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [%s]\n", syms->symbol, + if (strlen(syms->alias)) + sprintf(name, "%s OR %s", syms->symbol, syms->alias); + else + strcpy(name, syms->symbol); + fprintf(stderr, " %-40s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", + fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); fprintf(stderr, "\n"); -- cgit v1.2.3 From 520f2c346af463fa00924b236e092da482b344cc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 16:52:51 +0200 Subject: perf report: Output more symbol related debug data Print more symbol relocation related info under -vv. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 +++-- tools/perf/util/symbol.c | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5eb5566f0c95..ec230a0146e9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -797,7 +797,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, { struct dso *dso = dsop ? *dsop : NULL; struct map *map = mapp ? *mapp : NULL; - uint64_t ip = *ipp; + u64 ip = *ipp; if (!thread) return NULL; @@ -814,7 +814,6 @@ resolve_symbol(struct thread *thread, struct map **mapp, *mapp = map; got_map: ip = map->map_ip(map, ip); - *ipp = ip; dso = map->dso; } else { @@ -828,6 +827,8 @@ got_map: dso = kernel_dso; } dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip); + *ipp = ip; if (dsop) *dsop = dso; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 01b62fa03996..9c659ef6aec2 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -535,6 +535,10 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, gelf_getshdr(sec, &shdr); obj_start = sym.st_value; + if (verbose >= 2) + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; f = symbol__new(sym.st_value, sym.st_size, -- cgit v1.2.3 From c0c22dbfa8ba3c5045eeb9c76d2822ffc44fefc3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 20:47:26 +0530 Subject: perf_counter tools: Set alias for page-faults "faults" should be alias for "page-faults" Also fixed alignment and 80 characters issue Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245683846.12092.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f5695486ad3f..06af2fadcd87 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -33,8 +33,7 @@ static struct event_symbol event_symbols[] = { { CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" }, - { CSW(PAGE_FAULTS), "page-faults", "" }, - { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS), "page-faults", "faults" }, { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, @@ -71,24 +70,24 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 -static char *hw_cache [][MAX_ALIASES] = { - { "L1-data" , "l1-d", "l1d" }, - { "L1-instruction" , "l1-i", "l1i" }, - { "L2" , "l2" }, - { "Data-TLB" , "dtlb", "d-tlb" }, - { "Instruction-TLB" , "itlb", "i-tlb" }, - { "Branch" , "bpu" , "btb", "bpc" }, +static char *hw_cache[][MAX_ALIASES] = { + { "L1-data", "l1-d", "l1d" }, + { "L1-instruction", "l1-i", "l1i" }, + { "L2", "l2" }, + { "Data-TLB", "dtlb", "d-tlb" }, + { "Instruction-TLB", "itlb", "i-tlb" }, + { "Branch", "bpu" , "btb", "bpc" }, }; -static char *hw_cache_op [][MAX_ALIASES] = { - { "Load" , "read" }, - { "Store" , "write" }, - { "Prefetch" , "speculative-read", "speculative-load" }, +static char *hw_cache_op[][MAX_ALIASES] = { + { "Load", "read" }, + { "Store", "write" }, + { "Prefetch", "speculative-read", "speculative-load" }, }; -static char *hw_cache_result [][MAX_ALIASES] = { - { "Reference" , "ops", "access" }, - { "Miss" }, +static char *hw_cache_result[][MAX_ALIASES] = { + { "Reference", "ops", "access" }, + { "Miss" }, }; char *event_name(int counter) @@ -160,7 +159,8 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) return -1; } -static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +static int +parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) { int cache_type = -1, cache_op = 0, cache_result = 0; @@ -201,7 +201,7 @@ static int check_events(const char *str, unsigned int i) if (strlen(event_symbols[i].alias)) if (!strncmp(str, event_symbols[i].alias, - strlen(event_symbols[i].alias))) + strlen(event_symbols[i].alias))) return 1; return 0; } -- cgit v1.2.3 From dee412066aeb16c43cf31599948c1a1de385df56 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 23 Jun 2009 02:22:39 +0530 Subject: perf stat: Fix command option / manpage -l is not supported, it should be -S for scale. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245703959.6167.16.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c368a72721d7..0d74346d21ab 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics SYNOPSIS -------- [verse] -'perf stat' [-e | --event=EVENT] [-l] [-a] -'perf stat' [-e | --event=EVENT] [-l] [-a] -- [] +'perf stat' [-e | --event=EVENT] [-S] [-a] +'perf stat' [-e | --event=EVENT] [-S] [-a] -- [] DESCRIPTION ----------- @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --l:: +-S:: scale counter values EXAMPLES -- cgit v1.2.3 From 3d906ef10a539ff336010afab8f6f9c4fe379695 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 11:23:07 +0200 Subject: perf_counter tools: Handle overlapping MMAP events Martin Schwidefsky reported "perf report" symbol resolution problems on S390. Since we only report MMAP, not MUNMAP, we have to deal with overlapping maps. We used to simply throw out the old map on the assumption whole maps got unmapped. This obviously doesn't deal with partial unmaps. However it appears some dynamic linkers do fancy partial unmaps (s390), so do something more elaborate and truncate the old maps, only removing them when they've been fully covered. This resolves (part of) the S390 symbol resolution problems. Reported-by: Martin Schwidefsky Tested-by: Martin Schwidefsky Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ec230a0146e9..b4e76f75ba87 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -400,9 +400,27 @@ static void thread__insert_map(struct thread *self, struct map *map) list_for_each_entry_safe(pos, tmp, &self->maps, node) { if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); + if (verbose >= 2) { + printf("overlapping maps:\n"); + map__fprintf(map, stdout); + map__fprintf(pos, stdout); + } + + if (map->start <= pos->start && map->end > pos->start) + pos->start = map->end; + + if (map->end >= pos->end && map->start < pos->end) + pos->end = map->start; + + if (verbose >= 2) { + printf("after collision:\n"); + map__fprintf(pos, stdout); + } + + if (pos->start >= pos->end) { + list_del_init(&pos->node); + free(pos); + } } } -- cgit v1.2.3 From b0a28589b2fc9bee8ed83dee006a497d1ce93841 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Jun 2009 16:39:53 +0200 Subject: perf report: Fix help text typo Reported-by: Brice Goglin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 52d3fc6846a9..40c1db83a16d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -13,7 +13,7 @@ SYNOPSIS DESCRIPTION ----------- This command displays the performance counter profile information recorded -via perf report. +via perf record. OPTIONS ------- -- cgit v1.2.3 From cca03c0aeb18a975abec28df518a2b64ae3e6964 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 23 Jun 2009 17:12:49 +0530 Subject: perf stat: Fix verbose for perf stat Error message should use stderr for verbose (-v), otherwise message will be lost for: $ ./perf stat -v > /dev/null For example on AMD bus-cycles event is not available so now it looks like: $ ./perf stat -v -e bus-cycles ls > /dev/null Error: counter 0, sys_perf_counter_open() syscall returned with -1 (Invalid argument) Performance counter stats for 'ls': bus-cycles 0.006765877 seconds time elapsed. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245757369.3776.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea25..5e04fcc8d077 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -109,6 +109,10 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; + +#define ERR_PERF_OPEN \ +"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" + static void create_perf_stat_counter(int counter) { struct perf_counter_attr *attr = attrs + counter; @@ -119,20 +123,20 @@ static void create_perf_stat_counter(int counter) if (system_wide) { int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); - } + if (fd[cpu][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[cpu][counter], strerror(errno)); } } else { attr->inherit = inherit; attr->disabled = 1; fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); - if (fd[0][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); - } + if (fd[0][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[0][counter], strerror(errno)); } } @@ -168,7 +172,7 @@ static void read_counter(int counter) count[0] = count[1] = count[2] = 0; nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { if (fd[cpu][counter] < 0) continue; -- cgit v1.2.3 From f7679dabfaf69840b000d238a020cee7157aca17 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 22 Jun 2009 18:42:33 +0200 Subject: perf_counter tools: Fix strbuf_fread() error path handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit size_t res cannot be less than 0 - fread returns 0 on error. [ Updated by: René Scharfe ] Reported-by: Ingo Molnar Signed-off-by: Roel Kluin Cc: Andrew Morton Cc: Junio C Hamano LKML-Reference: <4A3FB479.2090902@lsrfire.ath.cx> Signed-off-by: Ingo Molnar --- tools/perf/util/strbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index eaba09306802..464e7ca898cf 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -259,7 +259,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) res = fread(sb->buf + sb->len, 1, size, f); if (res > 0) strbuf_setlen(sb, sb->len + res); - else if (res < 0 && oldalloc == 0) + else if (oldalloc == 0) strbuf_release(sb); return res; } -- cgit v1.2.3 From 3d63259583278262d9153316094e315f73ebfcb5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 24 Jun 2009 18:19:34 +0530 Subject: perf stat: Remove dead code Remove dead code and do some code alignment. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245847774.2681.2.camel@ht.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5e04fcc8d077..8420ec589506 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,42 +59,27 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { }; +#define MAX_RUN 100 + static int system_wide = 0; -static int inherit = 1; static int verbose = 0; - -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static int target_pid = -1; static int nr_cpus = 0; -static unsigned int page_size; +static int run_idx = 0; +static int run_count = 1; +static int inherit = 1; static int scale = 1; +static int target_pid = -1; -static const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -#define MAX_RUN 100 - -static int run_count = 1; -static int run_idx = 0; - -static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; - -//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; - +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static u64 runtime_nsecs[MAX_RUN]; static u64 walltime_nsecs[MAX_RUN]; static u64 runtime_cycles[MAX_RUN]; +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + static u64 event_res_avg[MAX_COUNTERS][3]; static u64 event_res_noise[MAX_COUNTERS][3]; @@ -109,7 +94,6 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; - #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" @@ -470,9 +454,9 @@ static const struct option options[] = { OPT_INTEGER('p', "pid", &target_pid, "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), + "system-wide collection from all CPUs"), OPT_BOOLEAN('S', "scale", &scale, - "scale/normalize counters"), + "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, @@ -484,8 +468,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix) { int status; - page_size = sysconf(_SC_PAGE_SIZE); - memcpy(attrs, default_attrs, sizeof(attrs)); argc = parse_options(argc, argv, options, stat_usage, 0); @@ -515,7 +497,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) status = 0; for (run_idx = 0; run_idx < run_count; run_idx++) { if (run_count != 1 && verbose) - fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); status = run_perf_stat(argc, argv); } -- cgit v1.2.3 From 1b173f77dd0d5fd4f0ff18034aaa79e30da068b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 24 Jun 2009 19:54:29 +0200 Subject: perf_counter tools: Add CREDITS file for Git contributors Much of perf's libraries comes from the Git project. I noticed that the files (in tools/perf/util/*.[ch] and elsewhere) are quite spartan wrt. credits, so lets add a CREDITS file that includes an (incomplete!) list of main contributors. Thanks guys, these libraries are really useful. Special thanks go to Johannes Schindelin and Junio C Hamano for coming up with this list. List-Composed-By: Johannes Schindelin Cc: Junio C Hamano Signed-off-by: Ingo Molnar --- tools/perf/CREDITS | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/perf/CREDITS (limited to 'tools') diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS new file mode 100644 index 000000000000..c2ddcb3acbd0 --- /dev/null +++ b/tools/perf/CREDITS @@ -0,0 +1,30 @@ +Most of the infrastructure that 'perf' uses here has been reused +from the Git project, as of version: + + 66996ec: Sync with 1.6.2.4 + +Here is an (incomplete!) list of main contributors to those files +in util/* and elsewhere: + + Alex Riesen + Christian Couder + Dmitry Potapov + Jeff King + Johannes Schindelin + Johannes Sixt + Junio C Hamano + Linus Torvalds + Matthias Kestenholz + Michal Ostrowski + Miklos Vajna + Petr Baudis + Pierre Habouzit + René Scharfe + Samuel Tardieu + Shawn O. Pearce + Steffen Prohaska + Steve Haslam + +Thanks guys! + +The full history of the files can be found in the upstream Git commits. -- cgit v1.2.3 From 76c64c5e4c47b6d28deb3cae8dfa07a93c2229dc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 24 Jun 2009 21:08:36 +0200 Subject: perf record: Fix filemap pathname parsing in /proc/pid/maps Looking backward for the first space from the end of a line in /proc/pid/maps does not find the start of the pathname of the mapped file if it contains a space. Since the only slashes we have in this file occur in the (absolute!) pathname column of file mappings, looking for the first slash in a line is a safe method to find the name. Signed-off-by: Johannes Weiner Cc: Stefani Seibold Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Peter Zijlstra LKML-Reference: <20090624190835.GA25548@cmpxchg.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d7ebbd757543..9b899ba1b410 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -306,12 +306,11 @@ static void pid_synthesize_mmap_samples(pid_t pid) continue; pbf += n + 3; if (*pbf == 'x') { /* vm_exec */ - char *execname = strrchr(bf, ' '); + char *execname = strchr(bf, '/'); - if (execname == NULL || execname[1] != '/') + if (execname == NULL) continue; - execname += 1; size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(mmap_ev.filename, execname, size); -- cgit v1.2.3 From 06813f6c743420c16f9248ab59bd2e68a2de57ba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 17:16:07 +0530 Subject: perf_counter tools: Check for valid cache operations Made new table for cache operartion stat 'hw_cache_stat' as: L1I : Read and prefetch only ITLB and BPU : Read-only introduce is_cache_op_valid() for cache operation validity And checks for valid cache operations. Reported-by : Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245930367.5308.33.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 06af2fadcd87..7939a21130d2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -90,6 +90,34 @@ static char *hw_cache_result[][MAX_ALIASES] = { { "Miss" }, }; +#define C(x) PERF_COUNT_HW_CACHE_##x +#define CACHE_READ (1 << C(OP_READ)) +#define CACHE_WRITE (1 << C(OP_WRITE)) +#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) +#define COP(x) (1 << x) + +/* + * cache operartion stat + * L1I : Read and prefetch only + * ITLB and BPU : Read-only + */ +static unsigned long hw_cache_stat[C(MAX)] = { + [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), + [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(ITLB)] = (CACHE_READ), + [C(BPU)] = (CACHE_READ), +}; + +static int is_cache_op_valid(u8 cache_type, u8 cache_op) +{ + if (hw_cache_stat[cache_type] & COP(cache_op)) + return 1; /* valid */ + else + return 0; /* invalid */ +} + char *event_name(int counter) { u64 config = attrs[counter].config; @@ -123,6 +151,8 @@ char *event_name(int counter) if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) return "unknown-ext-hardware-cache-result"; + if (!is_cache_op_valid(cache_type, cache_op)) + return "invalid-cache"; sprintf(name, "%s-Cache-%s-%ses", hw_cache[cache_type][0], hw_cache_op[cache_op][0], @@ -179,6 +209,9 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) if (cache_op == -1) cache_op = PERF_COUNT_HW_CACHE_OP_READ; + if (!is_cache_op_valid(cache_type, cache_op)) + return -EINVAL; + cache_result = parse_aliases(str, hw_cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); /* -- cgit v1.2.3 From e5c59547791f171b280bc4c4b2c3ff171824c1a3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 18:25:22 +0530 Subject: perf_counter tools: Shorten names for events Added new alias for events. On AMD box: $ ./perf stat -e l1d -e l1d-misses -e l1d-write -e l1d-prefetch -e l1d-prefetch-miss -e l1i -e l1i-misses -e l1i-prefetch -e l2 -e l2-misses -e l2-write -e dtlb -e dtlb-misses -e itlb -e itlb-misses -e bpu -e bpu-misses -- ls -lR /usr/include/ > /dev/null Before : Performance counter stats for 'ls -lR /usr/include/': 248064467 L1-data-Cache-Load-Referencees (scaled from 23.27%) 1001433 L1-data-Cache-Load-Misses (scaled from 23.34%) 153691 L1-data-Cache-Store-Referencees (scaled from 23.34%) 423248 L1-data-Cache-Prefetch-Referencees (scaled from 23.33%) 302138 L1-data-Cache-Prefetch-Misses (scaled from 23.25%) 251217546 L1-instruction-Cache-Load-Referencees (scaled from 23.25%) 5757005 L1-instruction-Cache-Load-Misses (scaled from 23.23%) 93435 L1-instruction-Cache-Prefetch-Referencees (scaled from 23.24%) 6496073 L2-Cache-Load-Referencees (scaled from 23.32%) 609485 L2-Cache-Load-Misses (scaled from 23.45%) 6876991 L2-Cache-Store-Referencees (scaled from 23.71%) 248922840 Data-TLB-Cache-Load-Referencees (scaled from 23.94%) 5828386 Data-TLB-Cache-Load-Misses (scaled from 24.17%) 257613506 Instruction-TLB-Cache-Load-Referencees (scaled from 24.20%) 6833 Instruction-TLB-Cache-Load-Misses (scaled from 23.88%) 109043606 Branch-Cache-Load-Referencees (scaled from 23.64%) 5552296 Branch-Cache-Load-Misses (scaled from 23.42%) 0.413702461 seconds time elapsed. After : Peformance counter stats for 'ls -lR /usr/include/': 266590464 L1-d$-loads (scaled from 23.03%) 1222273 L1-d$-load-misses (scaled from 23.58%) 146204 L1-d$-stores (scaled from 23.83%) 406344 L1-d$-prefetches (scaled from 24.09%) 283748 L1-d$-prefetch-misses (scaled from 24.10%) 249650965 L1-i$-loads (scaled from 23.80%) 3353961 L1-i$-load-misses (scaled from 23.82%) 104599 L1-i$-prefetches (scaled from 23.68%) 4836405 LLC-loads (scaled from 23.67%) 498214 LLC-load-misses (scaled from 23.66%) 4953994 LLC-stores (scaled from 23.64%) 243354097 dTLB-loads (scaled from 23.77%) 6468584 dTLB-load-misses (scaled from 23.74%) 249719549 iTLB-loads (scaled from 23.25%) 5060 iTLB-load-misses (scaled from 23.00%) 112343016 branch-loads (scaled from 22.76%) 5528876 branch-load-misses (scaled from 22.54%) 0.427154051 seconds time elapsed. Reported-by : Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245934522.5308.39.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 45 ++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7939a21130d2..430f06083201 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,23 +71,23 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-data", "l1-d", "l1d" }, - { "L1-instruction", "l1-i", "l1i" }, - { "L2", "l2" }, - { "Data-TLB", "dtlb", "d-tlb" }, - { "Instruction-TLB", "itlb", "i-tlb" }, - { "Branch", "bpu" , "btb", "bpc" }, + { "L1-d$", "l1-d", "L1-data", }, + { "L1-i$", "l1-i", "L1-instruction", }, + { "LLC", "L2" }, + { "dTLB", "d-tlb", "Data-TLB", }, + { "iTLB", "i-tlb", "Instruction-TLB", }, + { "branch", "branches", "bpu", "btb", "bpc", }, }; static char *hw_cache_op[][MAX_ALIASES] = { - { "Load", "read" }, - { "Store", "write" }, - { "Prefetch", "speculative-read", "speculative-load" }, + { "load", "loads", "read", }, + { "store", "stores", "write", }, + { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; static char *hw_cache_result[][MAX_ALIASES] = { - { "Reference", "ops", "access" }, - { "Miss" }, + { "refs", "Reference", "ops", "access", }, + { "misses", "miss", }, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -118,6 +118,22 @@ static int is_cache_op_valid(u8 cache_type, u8 cache_op) return 0; /* invalid */ } +static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) +{ + static char name[50]; + + if (cache_result) { + sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], + hw_cache_op[cache_op][0], + hw_cache_result[cache_result][0]); + } else { + sprintf(name, "%s-%s", hw_cache[cache_type][0], + hw_cache_op[cache_op][1]); + } + + return name; +} + char *event_name(int counter) { u64 config = attrs[counter].config; @@ -137,7 +153,6 @@ char *event_name(int counter) case PERF_TYPE_HW_CACHE: { u8 cache_type, cache_op, cache_result; - static char name[100]; cache_type = (config >> 0) & 0xff; if (cache_type > PERF_COUNT_HW_CACHE_MAX) @@ -153,12 +168,8 @@ char *event_name(int counter) if (!is_cache_op_valid(cache_type, cache_op)) return "invalid-cache"; - sprintf(name, "%s-Cache-%s-%ses", - hw_cache[cache_type][0], - hw_cache_op[cache_op][0], - hw_cache_result[cache_result][0]); - return name; + return event_cache_name(cache_type, cache_op, cache_result); } case PERF_TYPE_SOFTWARE: -- cgit v1.2.3 From 7c6a1c65bbd3be688e581511f45818663efc1877 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 17:05:54 +0200 Subject: perf_counter tools: Rework the file format Create a structured file format that includes the full perf_counter_attr and all its relevant counter IDs so that the reporting program has full information. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 3 +- tools/perf/builtin-record.c | 100 +++++++++++------- tools/perf/builtin-report.c | 37 +++++-- tools/perf/perf.h | 8 +- tools/perf/types.h | 17 ---- tools/perf/util/header.c | 242 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 37 +++++++ tools/perf/util/string.h | 2 +- tools/perf/util/symbol.h | 2 +- tools/perf/util/types.h | 17 ++++ 10 files changed, 394 insertions(+), 71 deletions(-) delete mode 100644 tools/perf/types.h create mode 100644 tools/perf/util/header.c create mode 100644 tools/perf/util/header.h create mode 100644 tools/perf/util/types.h (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef49913..d3887ed51a64 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -290,7 +290,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += perf.h -LIB_H += types.h +LIB_H += util/types.h LIB_H += util/list.h LIB_H += util/rbtree.h LIB_H += util/levenshtein.h @@ -328,6 +328,7 @@ LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o +LIB_OBJS += util/header.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9b899ba1b410..f4f0240d2302 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -14,6 +14,8 @@ #include "util/parse-events.h" #include "util/string.h" +#include "util/header.h" + #include #include @@ -52,7 +54,8 @@ static int nr_poll; static int nr_cpu; static int file_new = 1; -static struct perf_file_header file_header; + +struct perf_header *header; struct mmap_event { struct perf_event_header header; @@ -328,7 +331,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) fclose(fp); } -static void synthesize_samples(void) +static void synthesize_all(void) { DIR *proc; struct dirent dirent, *next; @@ -352,10 +355,35 @@ static void synthesize_samples(void) static int group_fd; +static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) +{ + struct perf_header_attr *h_attr; + + if (nr < header->attrs) { + h_attr = header->attr[nr]; + } else { + h_attr = perf_header_attr__new(a); + perf_header__add_attr(header, h_attr); + } + + return h_attr; +} + static void create_counter(int counter, int cpu, pid_t pid) { struct perf_counter_attr *attr = attrs + counter; - int track = 1; + struct perf_header_attr *h_attr; + int track = !counter; /* only the first counter needs these */ + struct { + u64 count; + u64 time_enabled; + u64 time_running; + u64 id; + } read_data; + + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; @@ -368,22 +396,11 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; - if (file_new) { - file_header.sample_type = attr->sample_type; - } else { - if (file_header.sample_type != attr->sample_type) { - fprintf(stderr, "incompatible append\n"); - exit(-1); - } - } - attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; attr->disabled = 1; - track = 0; /* only the first counter needs these */ - try_again: fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); @@ -414,6 +431,19 @@ try_again: exit(-1); } + h_attr = get_header_attr(attr, counter); + + if (!file_new) { + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + + read(fd[nr_cpu][counter], &read_data, sizeof(read_data)); + + perf_header_attr__add_id(h_attr, read_data.id); + assert(fd[nr_cpu][counter] >= 0); fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); @@ -444,11 +474,6 @@ static void open_counters(int cpu, pid_t pid) { int counter; - if (pid > 0) { - pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid); - } - group_fd = -1; for (counter = 0; counter < nr_counters; counter++) create_counter(counter, cpu, pid); @@ -458,17 +483,16 @@ static void open_counters(int cpu, pid_t pid) static void atexit_header(void) { - file_header.data_size += bytes_written; + header->data_size += bytes_written; - if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) - perror("failed to write on file headers"); + perf_header__write(header, output); } static int __cmd_record(int argc, const char **argv) { int i, counter; struct stat st; - pid_t pid; + pid_t pid = 0; int flags; int ret; @@ -499,22 +523,31 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - if (!file_new) { - if (read(output, &file_header, sizeof(file_header)) == -1) { - perror("failed to read file headers"); - exit(-1); - } - - lseek(output, file_header.data_size, SEEK_CUR); - } + if (!file_new) + header = perf_header__read(output); + else + header = perf_header__new(); atexit(atexit_header); if (!system_wide) { - open_counters(-1, target_pid != -1 ? target_pid : getpid()); + pid = target_pid; + if (pid == -1) + pid = getpid(); + + open_counters(-1, pid); } else for (i = 0; i < nr_cpus; i++) open_counters(i, target_pid); + if (file_new) + perf_header__write(header, output); + + if (!system_wide) { + pid_synthesize_comm_event(pid, 0); + pid_synthesize_mmap_samples(pid); + } else + synthesize_all(); + if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) @@ -538,9 +571,6 @@ static int __cmd_record(int argc, const char **argv) } } - if (system_wide) - synthesize_samples(); - while (!done) { int hits = samples; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b4e76f75ba87..e575f3039766 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -17,6 +17,7 @@ #include "util/string.h" #include "perf.h" +#include "util/header.h" #include "util/parse-options.h" #include "util/parse-events.h" @@ -1385,13 +1386,27 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static struct perf_file_header file_header; +static struct perf_header *header; + +static int perf_header__has_sample(u64 sample_mask) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + + if (!(attr->attr.sample_type & sample_mask)) + return 0; + } + + return 1; +} static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; unsigned long offset = 0; - unsigned long head = sizeof(file_header); + unsigned long head, shift; struct stat stat; event_t *event; uint32_t size; @@ -1419,13 +1434,11 @@ static int __cmd_report(void) exit(0); } - if (read(input, &file_header, sizeof(file_header)) == -1) { - perror("failed to read file headers"); - exit(-1); - } + header = perf_header__read(input); + head = header->data_offset; if (sort__has_parent && - !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { + !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { fprintf(stderr, "selected --sort parent, but no callchain data\n"); exit(-1); } @@ -1445,6 +1458,11 @@ static int __cmd_report(void) cwd = NULL; cwdlen = 0; } + + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + remap: buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, MAP_SHARED, input, offset); @@ -1461,9 +1479,10 @@ more: size = 8; if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); int ret; + shift = page_size * (head / page_size); + ret = munmap(buf, page_size * mmap_window); assert(ret == 0); @@ -1501,7 +1520,7 @@ more: head += size; - if (offset + head >= sizeof(file_header) + file_header.data_size) + if (offset + head >= header->data_offset + header->data_size) goto done; if (offset + head < stat.st_size) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index bccb529dac08..16c84fd73c86 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,7 +19,7 @@ #include #include "../../include/linux/perf_counter.h" -#include "types.h" +#include "util/types.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all @@ -66,10 +66,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 -struct perf_file_header { - u64 version; - u64 sample_type; - u64 data_size; -}; - #endif diff --git a/tools/perf/types.h b/tools/perf/types.h deleted file mode 100644 index 5e75f9005940..000000000000 --- a/tools/perf/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _PERF_TYPES_H -#define _PERF_TYPES_H - -/* - * We define u64 as unsigned long long for every architecture - * so that we can print it with %Lx without getting warnings. - */ -typedef unsigned long long u64; -typedef signed long long s64; -typedef unsigned int u32; -typedef signed int s32; -typedef unsigned short u16; -typedef signed short s16; -typedef unsigned char u8; -typedef signed char s8; - -#endif /* _PERF_TYPES_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c new file mode 100644 index 000000000000..450384b3bbe5 --- /dev/null +++ b/tools/perf/util/header.c @@ -0,0 +1,242 @@ +#include +#include +#include +#include + +#include "util.h" +#include "header.h" + +/* + * + */ + +struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) +{ + struct perf_header_attr *self = malloc(sizeof(*self)); + + if (!self) + die("nomem"); + + self->attr = *attr; + self->ids = 0; + self->size = 1; + self->id = malloc(sizeof(u64)); + + if (!self->id) + die("nomem"); + + return self; +} + +void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) +{ + int pos = self->ids; + + self->ids++; + if (self->ids > self->size) { + self->size *= 2; + self->id = realloc(self->id, self->size * sizeof(u64)); + if (!self->id) + die("nomem"); + } + self->id[pos] = id; +} + +/* + * + */ + +struct perf_header *perf_header__new(void) +{ + struct perf_header *self = malloc(sizeof(*self)); + + if (!self) + die("nomem"); + + self->frozen = 0; + + self->attrs = 0; + self->size = 1; + self->attr = malloc(sizeof(void *)); + + if (!self->attr) + die("nomem"); + + self->data_offset = 0; + self->data_size = 0; + + return self; +} + +void perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr) +{ + int pos = self->attrs; + + if (self->frozen) + die("frozen"); + + self->attrs++; + if (self->attrs > self->size) { + self->size *= 2; + self->attr = realloc(self->attr, self->size * sizeof(void *)); + if (!self->attr) + die("nomem"); + } + self->attr[pos] = attr; +} + +static const char *__perf_magic = "PERFFILE"; + +#define PERF_MAGIC (*(u64 *)__perf_magic) + +struct perf_file_section { + u64 offset; + u64 size; +}; + +struct perf_file_attr { + struct perf_counter_attr attr; + struct perf_file_section ids; +}; + +struct perf_file_header { + u64 magic; + u64 size; + u64 attr_size; + struct perf_file_section attrs; + struct perf_file_section data; +}; + +static void do_write(int fd, void *buf, size_t size) +{ + while (size) { + int ret = write(fd, buf, size); + + if (ret < 0) + die("failed to write"); + + size -= ret; + buf += ret; + } +} + +void perf_header__write(struct perf_header *self, int fd) +{ + struct perf_file_header f_header; + struct perf_file_attr f_attr; + struct perf_header_attr *attr; + int i; + + lseek(fd, sizeof(f_header), SEEK_SET); + + + for (i = 0; i < self->attrs; i++) { + attr = self->attr[i]; + + attr->id_offset = lseek(fd, 0, SEEK_CUR); + do_write(fd, attr->id, attr->ids * sizeof(u64)); + } + + + self->attr_offset = lseek(fd, 0, SEEK_CUR); + + for (i = 0; i < self->attrs; i++) { + attr = self->attr[i]; + + f_attr = (struct perf_file_attr){ + .attr = attr->attr, + .ids = { + .offset = attr->id_offset, + .size = attr->ids * sizeof(u64), + } + }; + do_write(fd, &f_attr, sizeof(f_attr)); + } + + + self->data_offset = lseek(fd, 0, SEEK_CUR); + + f_header = (struct perf_file_header){ + .magic = PERF_MAGIC, + .size = sizeof(f_header), + .attr_size = sizeof(f_attr), + .attrs = { + .offset = self->attr_offset, + .size = self->attrs * sizeof(f_attr), + }, + .data = { + .offset = self->data_offset, + .size = self->data_size, + }, + }; + + lseek(fd, 0, SEEK_SET); + do_write(fd, &f_header, sizeof(f_header)); + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + self->frozen = 1; +} + +static void do_read(int fd, void *buf, size_t size) +{ + while (size) { + int ret = read(fd, buf, size); + + if (ret < 0) + die("failed to read"); + + size -= ret; + buf += ret; + } +} + +struct perf_header *perf_header__read(int fd) +{ + struct perf_header *self = perf_header__new(); + struct perf_file_header f_header; + struct perf_file_attr f_attr; + u64 f_id; + + int nr_attrs, nr_ids, i, j; + + lseek(fd, 0, SEEK_SET); + do_read(fd, &f_header, sizeof(f_header)); + + if (f_header.magic != PERF_MAGIC || + f_header.size != sizeof(f_header) || + f_header.attr_size != sizeof(f_attr)) + die("incompatible file format"); + + nr_attrs = f_header.attrs.size / sizeof(f_attr); + lseek(fd, f_header.attrs.offset, SEEK_SET); + + for (i = 0; i < nr_attrs; i++) { + struct perf_header_attr *attr; + off_t tmp = lseek(fd, 0, SEEK_CUR); + + do_read(fd, &f_attr, sizeof(f_attr)); + + attr = perf_header_attr__new(&f_attr.attr); + + nr_ids = f_attr.ids.size / sizeof(u64); + lseek(fd, f_attr.ids.offset, SEEK_SET); + + for (j = 0; j < nr_ids; j++) { + do_read(fd, &f_id, sizeof(f_id)); + + perf_header_attr__add_id(attr, f_id); + } + perf_header__add_attr(self, attr); + lseek(fd, tmp, SEEK_SET); + } + + self->data_offset = f_header.data.offset; + self->data_size = f_header.data.size; + + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + self->frozen = 1; + + return self; +} diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h new file mode 100644 index 000000000000..b5ef53ad4c7a --- /dev/null +++ b/tools/perf/util/header.h @@ -0,0 +1,37 @@ +#ifndef _PERF_HEADER_H +#define _PERF_HEADER_H + +#include "../../../include/linux/perf_counter.h" +#include +#include "types.h" + +struct perf_header_attr { + struct perf_counter_attr attr; + int ids, size; + u64 *id; + off_t id_offset; +}; + +struct perf_header { + int frozen; + int attrs, size; + struct perf_header_attr **attr; + off_t attr_offset; + u64 data_offset; + u64 data_size; +}; + +struct perf_header *perf_header__read(int fd); +void perf_header__write(struct perf_header *self, int fd); + +void perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr); + +struct perf_header_attr * +perf_header_attr__new(struct perf_counter_attr *attr); +void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); + + +struct perf_header *perf_header__new(void); + +#endif /* _PERF_HEADER_H */ diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 37b03255b425..3dca2f654cd0 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,7 +1,7 @@ #ifndef _PERF_STRING_H_ #define _PERF_STRING_H_ -#include "../types.h" +#include "types.h" int hex2u64(const char *ptr, u64 *val); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ea332e56e458..940b432db16e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -2,7 +2,7 @@ #define _PERF_SYMBOL_ 1 #include -#include "../types.h" +#include "types.h" #include "list.h" #include "rbtree.h" diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h new file mode 100644 index 000000000000..5e75f9005940 --- /dev/null +++ b/tools/perf/util/types.h @@ -0,0 +1,17 @@ +#ifndef _PERF_TYPES_H +#define _PERF_TYPES_H + +/* + * We define u64 as unsigned long long for every architecture + * so that we can print it with %Lx without getting warnings. + */ +typedef unsigned long long u64; +typedef signed long long s64; +typedef unsigned int u32; +typedef signed int s32; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned char u8; +typedef signed char s8; + +#endif /* _PERF_TYPES_H */ -- cgit v1.2.3 From e6e18ec79b023d5fe84226cef533cf0e3770ce93 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 11:27:12 +0200 Subject: perf_counter: Rework the sample ABI The PERF_EVENT_READ implementation made me realize we don't actually need the sample_type int the output sample, since we already have that in the perf_counter_attr information. Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the event->type overloading, and imply put counter overflow samples in a PERF_EVENT_SAMPLE type. This also fixes the issue that event->type was only 32-bit and sample_type had 64 usable bits. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++++----- kernel/perf_counter.c | 36 +++++++++++++++--------------------- tools/perf/builtin-annotate.c | 8 ++++---- tools/perf/builtin-report.c | 32 +++++++++++++++++++------------- tools/perf/builtin-top.c | 11 ++++++----- 5 files changed, 49 insertions(+), 48 deletions(-) (limited to 'tools') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index de70a10b5ec8..3078e23c91eb 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -262,7 +262,6 @@ struct perf_counter_mmap_page { #define PERF_EVENT_MISC_KERNEL (1 << 0) #define PERF_EVENT_MISC_USER (2 << 0) #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -348,9 +347,6 @@ enum perf_event_type { PERF_EVENT_READ = 8, /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_SAMPLE_* - * * struct { * struct perf_event_header header; * @@ -358,8 +354,9 @@ enum perf_event_type { * { u32 pid, tid; } && PERF_SAMPLE_TID * { u64 time; } && PERF_SAMPLE_TIME * { u64 addr; } && PERF_SAMPLE_ADDR - * { u64 config; } && PERF_SAMPLE_CONFIG + * { u64 id; } && PERF_SAMPLE_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD * * { u64 nr; * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP @@ -368,6 +365,9 @@ enum perf_event_type { * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ + PERF_EVENT_SAMPLE = 9, + + PERF_EVENT_MAX, /* non-ABI */ }; enum perf_callchain_context { diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 385ca51c6e60..f2f232696587 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2575,15 +2575,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, u32 cpu, reserved; } cpu_entry; - header.type = 0; + header.type = PERF_EVENT_SAMPLE; header.size = sizeof(header); - header.misc = PERF_EVENT_MISC_OVERFLOW; + header.misc = 0; header.misc |= perf_misc_flags(data->regs); if (sample_type & PERF_SAMPLE_IP) { ip = perf_instruction_pointer(data->regs); - header.type |= PERF_SAMPLE_IP; header.size += sizeof(ip); } @@ -2592,7 +2591,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, tid_entry.pid = perf_counter_pid(counter, current); tid_entry.tid = perf_counter_tid(counter, current); - header.type |= PERF_SAMPLE_TID; header.size += sizeof(tid_entry); } @@ -2602,34 +2600,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, */ time = sched_clock(); - header.type |= PERF_SAMPLE_TIME; header.size += sizeof(u64); } - if (sample_type & PERF_SAMPLE_ADDR) { - header.type |= PERF_SAMPLE_ADDR; + if (sample_type & PERF_SAMPLE_ADDR) header.size += sizeof(u64); - } - if (sample_type & PERF_SAMPLE_ID) { - header.type |= PERF_SAMPLE_ID; + if (sample_type & PERF_SAMPLE_ID) header.size += sizeof(u64); - } if (sample_type & PERF_SAMPLE_CPU) { - header.type |= PERF_SAMPLE_CPU; header.size += sizeof(cpu_entry); cpu_entry.cpu = raw_smp_processor_id(); } - if (sample_type & PERF_SAMPLE_PERIOD) { - header.type |= PERF_SAMPLE_PERIOD; + if (sample_type & PERF_SAMPLE_PERIOD) header.size += sizeof(u64); - } if (sample_type & PERF_SAMPLE_GROUP) { - header.type |= PERF_SAMPLE_GROUP; header.size += sizeof(u64) + counter->nr_siblings * sizeof(group_entry); } @@ -2639,10 +2628,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, if (callchain) { callchain_size = (1 + callchain->nr) * sizeof(u64); - - header.type |= PERF_SAMPLE_CALLCHAIN; header.size += callchain_size; - } + } else + header.size += sizeof(u64); } ret = perf_output_begin(&handle, counter, header.size, nmi, 1); @@ -2693,8 +2681,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } - if (callchain) - perf_output_copy(&handle, callchain, callchain_size); + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); + else { + u64 nr = 0; + perf_output_put(&handle, nr); + } + } perf_output_end(&handle); } diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7e58e3ad1508..722c0f54e549 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -855,7 +855,7 @@ static unsigned long total = 0, total_unknown = 0; static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; @@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) static int process_event(event_t *event, unsigned long offset, unsigned long head) { - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - switch (event->header.type) { + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + case PERF_EVENT_MMAP: return process_mmap_event(event, offset, head); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e575f3039766..ec5361c67bf5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,8 @@ static regex_t parent_regex; static int exclude_other = 1; +static u64 sample_type; + struct ip_event { struct perf_event_header header; u64 ip; @@ -1135,7 +1137,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) } static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; @@ -1147,12 +1149,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; - if (event->header.type & PERF_SAMPLE_PERIOD) { + if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; more_data += sizeof(u64); } - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1160,7 +1162,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); - if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + if (sample_type & PERF_SAMPLE_CALLCHAIN) { int i; chain = (void *)more_data; @@ -1352,10 +1354,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) { trace_event(event); - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - switch (event->header.type) { + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + case PERF_EVENT_MMAP: return process_mmap_event(event, offset, head); @@ -1388,18 +1390,21 @@ process_event(event_t *event, unsigned long offset, unsigned long head) static struct perf_header *header; -static int perf_header__has_sample(u64 sample_mask) +static u64 perf_header__sample_type(void) { + u64 sample_type = 0; int i; for (i = 0; i < header->attrs; i++) { struct perf_header_attr *attr = header->attr[i]; - if (!(attr->attr.sample_type & sample_mask)) - return 0; + if (!sample_type) + sample_type = attr->attr.sample_type; + else if (sample_type != attr->attr.sample_type) + die("non matching sample_type"); } - return 1; + return sample_type; } static int __cmd_report(void) @@ -1437,8 +1442,9 @@ static int __cmd_report(void) header = perf_header__read(input); head = header->data_offset; - if (sort__has_parent && - !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { + sample_type = perf_header__sample_type(); + + if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { fprintf(stderr, "selected --sort parent, but no callchain data\n"); exit(-1); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5352b5e352ed..cf0d21f1ae10 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter) samples--; } -static void process_event(u64 ip, int counter) +static void process_event(u64 ip, int counter, int user) { samples++; - if (ip < min_ip || ip > max_ip) { + if (user) { userspace_samples++; return; } @@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md) old += size; - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { - if (event->header.type & PERF_SAMPLE_IP) - process_event(event->ip.ip, md->counter); + if (event->header.type == PERF_EVENT_SAMPLE) { + int user = + (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; + process_event(event->ip.ip, md->counter, user); } } -- cgit v1.2.3 From 649c48a9e7fafcc72bfcc99471d9dea98d789d59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 21:12:48 +0200 Subject: perf-report: Add modes for inherited stats and no-samples Now that we can collect per task statistics, add modes that make use of that facility. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f4f0240d2302..798a56d890e5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -41,6 +41,8 @@ static int force = 0; static int append_file = 0; static int call_graph = 0; static int verbose = 0; +static int inherit_stat = 0; +static int no_samples = 0; static long samples; static struct timeval last_read; @@ -393,6 +395,12 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->sample_freq = freq; } + if (no_samples) + attr->sample_freq = 0; + + if (inherit_stat) + attr->inherit_stat = 1; + if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; @@ -571,7 +579,7 @@ static int __cmd_record(int argc, const char **argv) } } - while (!done) { + for (;;) { int hits = samples; for (i = 0; i < nr_cpu; i++) { @@ -579,8 +587,11 @@ static int __cmd_record(int argc, const char **argv) mmap_read(&mmap_array[i][counter]); } - if (hits == samples) + if (hits == samples) { + if (done) + break; ret = poll(event_array, nr_poll, 100); + } } /* @@ -629,6 +640,10 @@ static const struct option options[] = { "do call-graph (stack chain/backtrace) recording"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('s', "stat", &inherit_stat, + "per thread counts"), + OPT_BOOLEAN('n', "no-samples", &no_samples, + "don't sample"), OPT_END() }; -- cgit v1.2.3 From e9ea2fde7a07ae60a119171a2946ed2ae778271e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 22:46:04 +0200 Subject: perf-report: Add bare minimum PERF_EVENT_READ parsing Provide the basic infrastructure to provide per task stats. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ec5361c67bf5..681c2233f882 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -100,6 +100,13 @@ struct lost_event { u64 lost; }; +struct read_event { + struct perf_event_header header; + u32 pid,tid; + u64 value; + u64 format[3]; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; @@ -108,6 +115,7 @@ typedef union event_union { struct fork_event fork; struct period_event period; struct lost_event lost; + struct read_event read; } event_t; static LIST_HEAD(dsos); @@ -1349,6 +1357,19 @@ static void trace_event(event_t *event) dprintf(".\n"); } +static int +process_read_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->read.pid, + event->read.tid, + event->read.value); + + return 0; +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1373,6 +1394,9 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_LOST: return process_lost_event(event, offset, head); + case PERF_EVENT_READ: + return process_read_event(event, offset, head); + /* * We dont process them right now but they are fine: */ -- cgit v1.2.3 From 4418351f06d9ce73acc846158c20186965f920f3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 21:27:42 +0530 Subject: perf_counter tools: Add alias for 'l1d' and 'l1i' Add 'l1d' and 'l1i' aliases again as shortcuts - just dont make them the primary display alias. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1245945462.9157.11.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 430f06083201..4d042f104cdc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,8 +71,8 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-d$", "l1-d", "L1-data", }, - { "L1-i$", "l1-i", "L1-instruction", }, + { "L1-d$", "l1-d", "l1d", "L1-data", }, + { "L1-i$", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2" }, { "dTLB", "d-tlb", "Data-TLB", }, { "iTLB", "i-tlb", "Instruction-TLB", }, -- cgit v1.2.3 From 3928ddbe994cce1da1b6365b0db04d5765f254f4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Jun 2009 22:21:27 +0200 Subject: perf record: Fix unhandled io return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building latest perfcounter fails on the following error: builtin-record.c: In function ‘create_counter’: builtin-record.c:451: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result make: *** [builtin-record.o] Erreur 1 Just check if we successfully read the perf file descriptor. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1245961287-5327-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 798a56d890e5..d18546f37d7c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -448,7 +448,10 @@ try_again: } } - read(fd[nr_cpu][counter], &read_data, sizeof(read_data)); + if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { + perror("Unable to read perf file descriptor\n"); + exit(-1); + } perf_header_attr__add_id(h_attr, read_data.id); -- cgit v1.2.3 From 8cb76d99d715741637b6d0884f389e17e9cb05d2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Jun 2009 16:28:00 +0200 Subject: perf_counter tools: Prepare a small callchain framework We plan to display the callchains depending on some user-configurable parameters. To gather the callchains stats from the recorded stream in a fast way, this patch introduces an ad hoc radix tree adapted for callchains and also a rbtree to sort these callchains once we have gathered every events from the stream. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1246026481-8314-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/builtin-report.c | 5 -- tools/perf/perf.h | 5 ++ tools/perf/util/callchain.c | 174 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/callchain.h | 33 +++++++++ 5 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 tools/perf/util/callchain.c create mode 100644 tools/perf/util/callchain.h (limited to 'tools') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d3887ed51a64..1c1296d8a64b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -329,6 +329,7 @@ LIB_OBJS += util/symbol.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o +LIB_OBJS += util/callchain.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 681c2233f882..28d1cb2127e9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,11 +62,6 @@ struct ip_event { unsigned char __more_data[]; }; -struct ip_callchain { - u64 nr; - u64 ips[0]; -}; - struct mmap_event { struct perf_event_header header; u32 pid, tid; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 16c84fd73c86..a49842b69a59 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,4 +66,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 +struct ip_callchain { + u64 nr; + u64 ips[0]; +}; + #endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c new file mode 100644 index 000000000000..ad3c28578961 --- /dev/null +++ b/tools/perf/util/callchain.c @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2009, Frederic Weisbecker + * + * Handle the callchains from the stream in an ad-hoc radix tree and then + * sort them in an rbtree. + * + */ + +#include +#include +#include +#include + +#include "callchain.h" + + +static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct callchain_node *rnode; + + while (*p) { + parent = *p; + rnode = rb_entry(parent, struct callchain_node, rb_node); + + if (rnode->hit < chain->hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&chain->rb_node, parent, p); + rb_insert_color(&chain->rb_node, root); +} + +/* + * Once we get every callchains from the stream, we can now + * sort them by hit + */ +void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) +{ + struct callchain_node *child; + + list_for_each_entry(child, &node->children, brothers) + sort_chain_to_rbtree(rb_root, child); + + if (node->hit) + rb_insert_callchain(rb_root, node); +} + +static struct callchain_node *create_child(struct callchain_node *parent) +{ + struct callchain_node *new; + + new = malloc(sizeof(*new)); + if (!new) { + perror("not enough memory to create child for code path tree"); + return NULL; + } + new->parent = parent; + INIT_LIST_HEAD(&new->children); + INIT_LIST_HEAD(&new->val); + list_add_tail(&new->brothers, &parent->children); + + return new; +} + +static void +fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) +{ + int i; + + for (i = start; i < chain->nr; i++) { + struct callchain_list *call; + + call = malloc(sizeof(*chain)); + if (!call) { + perror("not enough memory for the code path tree"); + return; + } + call->ip = chain->ips[i]; + list_add_tail(&call->list, &node->val); + } + node->val_nr = i - start; +} + +static void add_child(struct callchain_node *parent, struct ip_callchain *chain) +{ + struct callchain_node *new; + + new = create_child(parent); + fill_node(new, chain, parent->val_nr); + + new->hit = 1; +} + +static void +split_add_child(struct callchain_node *parent, struct ip_callchain *chain, + struct callchain_list *to_split, int idx) +{ + struct callchain_node *new; + + /* split */ + new = create_child(parent); + list_move_tail(&to_split->list, &new->val); + new->hit = parent->hit; + parent->hit = 0; + parent->val_nr = idx; + + /* create the new one */ + add_child(parent, chain); +} + +static int +__append_chain(struct callchain_node *root, struct ip_callchain *chain, + int start); + +static int +__append_chain_children(struct callchain_node *root, struct ip_callchain *chain) +{ + struct callchain_node *rnode; + + /* lookup in childrens */ + list_for_each_entry(rnode, &root->children, brothers) { + int ret = __append_chain(rnode, chain, root->val_nr); + if (!ret) + return 0; + } + return -1; +} + +static int +__append_chain(struct callchain_node *root, struct ip_callchain *chain, + int start) +{ + struct callchain_list *cnode; + int i = start; + bool found = false; + + /* lookup in the current node */ + list_for_each_entry(cnode, &root->val, list) { + if (cnode->ip != chain->ips[i++]) + break; + if (!found) + found = true; + if (i == chain->nr) + break; + } + + /* matches not, relay on the parent */ + if (!found) + return -1; + + /* we match only a part of the node. Split it and add the new chain */ + if (i < root->val_nr) { + split_add_child(root, chain, cnode, i); + return 0; + } + + /* we match 100% of the path, increment the hit */ + if (i == root->val_nr) { + root->hit++; + return 0; + } + + return __append_chain_children(root, chain); +} + +void append_chain(struct callchain_node *root, struct ip_callchain *chain) +{ + if (__append_chain_children(root, chain) == -1) + add_child(root, chain); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h new file mode 100644 index 000000000000..fa1cd2f71fd3 --- /dev/null +++ b/tools/perf/util/callchain.h @@ -0,0 +1,33 @@ +#ifndef __PERF_CALLCHAIN_H +#define __PERF_CALLCHAIN_H + +#include "../perf.h" +#include "list.h" +#include "rbtree.h" + + +struct callchain_node { + struct callchain_node *parent; + struct list_head brothers; + struct list_head children; + struct list_head val; + struct rb_node rb_node; + int val_nr; + int hit; +}; + +struct callchain_list { + unsigned long ip; + struct list_head list; +}; + +static inline void callchain_init(struct callchain_node *node) +{ + INIT_LIST_HEAD(&node->brothers); + INIT_LIST_HEAD(&node->children); + INIT_LIST_HEAD(&node->val); +} + +void append_chain(struct callchain_node *root, struct ip_callchain *chain); +void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); +#endif -- cgit v1.2.3 From f55c555226b1010b249730ec6b232e5470286950 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Jun 2009 16:28:01 +0200 Subject: perf report: Print sorted callchains per histogram entries Use the newly created callchains radix tree to gather the chains stats from the recorded events and then print the callchains for all of them, sorted by hits, using the "-c" parameter with perf report. Example: 66.15% [k] atm_clip_exit 63.08% 0xffffffffffffff80 0xffffffff810196a8 0xffffffff810c14c8 0xffffffff8101a79c 0xffffffff810194f3 0xffffffff8106ab7f 0xffffffff8106abe5 0xffffffff8106acde 0xffffffff8100d94b 0xffffffff8153e7ea [...] 1.54% 0xffffffffffffff80 0xffffffff810196a8 0xffffffff810c14c8 0xffffffff8101a79c [...] Symbols are not yet resolved. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1246026481-8314-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 82 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 28d1cb2127e9..ed391db9e0f8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/rbtree.h" #include "util/symbol.h" #include "util/string.h" +#include "util/callchain.h" #include "perf.h" #include "util/header.h" @@ -52,6 +53,7 @@ static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; static int exclude_other = 1; +static int callchain; static u64 sample_type; @@ -488,17 +490,19 @@ static size_t threads__fprintf(FILE *fp) static struct rb_root hist; struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - struct symbol *parent; - u64 ip; - char level; - - u64 count; + struct rb_node rb_node; + + struct thread *thread; + struct map *map; + struct dso *dso; + struct symbol *sym; + struct symbol *parent; + u64 ip; + char level; + struct callchain_node callchain; + struct rb_root sorted_chain; + + u64 count; }; /* @@ -768,6 +772,48 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } +static size_t +callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) +{ + struct callchain_list *chain; + size_t ret = 0; + + if (!self) + return 0; + + ret += callchain__fprintf(fp, self->parent, total_samples); + + + list_for_each_entry(chain, &self->val, list) + ret += fprintf(fp, " %p\n", (void *)chain->ip); + + return ret; +} + +static size_t +hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, + u64 total_samples) +{ + struct rb_node *rb_node; + struct callchain_node *chain; + size_t ret = 0; + + rb_node = rb_first(&self->sorted_chain); + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + ret += fprintf(fp, " %6.2f%%\n", percent); + ret += callchain__fprintf(fp, chain, total_samples); + ret += fprintf(fp, "\n"); + rb_node = rb_next(rb_node); + } + + return ret; +} + + static size_t hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { @@ -808,6 +854,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) ret += fprintf(fp, "\n"); + if (callchain) + hist_entry_callchain__fprintf(fp, self, total_samples); + return ret; } @@ -892,6 +941,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .level = level, .count = count, .parent = NULL, + .sorted_chain = RB_ROOT }; int cmp; @@ -934,6 +984,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!cmp) { he->count += count; + if (callchain) + append_chain(&he->callchain, chain); return 0; } @@ -947,6 +999,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!he) return -ENOMEM; *he = entry; + if (callchain) { + callchain_init(&he->callchain); + append_chain(&he->callchain, chain); + } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, &hist); @@ -1023,6 +1079,9 @@ static void output__insert_entry(struct hist_entry *he) struct rb_node *parent = NULL; struct hist_entry *iter; + if (callchain) + sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); + while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node); @@ -1599,6 +1658,7 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), + OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), OPT_END() }; -- cgit v1.2.3 From fde953c1c67986e1c381fa50d8207b1578b5cefa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:06:39 +0200 Subject: perf_counter tools: Remove dead code Vince Weaver reported that there's a handful of #ifdef __MINGW32__ sections in the code. Remove them as they are in essence dead code - as unlike upstream Git, the perf tool is unlikely to be ported to Windows. Reported-by: Vince Weaver Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/help.c | 15 ------- tools/perf/util/pager.c | 5 +-- tools/perf/util/run-command.c | 95 +------------------------------------------ tools/perf/util/run-command.h | 5 --- tools/perf/util/util.h | 15 ------- 5 files changed, 3 insertions(+), 132 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 6653f7dd1d78..17a00e0df2c4 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -126,21 +126,6 @@ static int is_executable(const char *name) !S_ISREG(st.st_mode)) return 0; -#ifdef __MINGW32__ - /* cannot trust the executable bit, peek into the file instead */ - char buf[3] = { 0 }; - int n; - int fd = open(name, O_RDONLY); - st.st_mode &= ~S_IXUSR; - if (fd >= 0) { - n = read(fd, buf, 2); - if (n == 2) - /* DOS executables start with "MZ" */ - if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) - st.st_mode |= S_IXUSR; - close(fd); - } -#endif return st.st_mode & S_IXUSR; } diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index a28bccae5458..1915de20dcac 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -9,7 +9,6 @@ static int spawned_pager; -#ifndef __MINGW32__ static void pager_preexec(void) { /* @@ -24,7 +23,6 @@ static void pager_preexec(void) setenv("LESS", "FRSX", 0); } -#endif static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; static struct child_process pager_process; @@ -70,9 +68,8 @@ void setup_pager(void) pager_argv[2] = pager; pager_process.argv = pager_argv; pager_process.in = -1; -#ifndef __MINGW32__ pager_process.preexec_cb = pager_preexec; -#endif + if (start_command(&pager_process)) return; diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c index b2f5e854f40a..a3935343091a 100644 --- a/tools/perf/util/run-command.c +++ b/tools/perf/util/run-command.c @@ -65,7 +65,6 @@ int start_command(struct child_process *cmd) cmd->err = fderr[0]; } -#ifndef __MINGW32__ fflush(NULL); cmd->pid = fork(); if (!cmd->pid) { @@ -118,71 +117,6 @@ int start_command(struct child_process *cmd) } exit(127); } -#else - int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ - const char **sargv = cmd->argv; - char **env = environ; - - if (cmd->no_stdin) { - s0 = dup(0); - dup_devnull(0); - } else if (need_in) { - s0 = dup(0); - dup2(fdin[0], 0); - } else if (cmd->in) { - s0 = dup(0); - dup2(cmd->in, 0); - } - - if (cmd->no_stderr) { - s2 = dup(2); - dup_devnull(2); - } else if (need_err) { - s2 = dup(2); - dup2(fderr[1], 2); - } - - if (cmd->no_stdout) { - s1 = dup(1); - dup_devnull(1); - } else if (cmd->stdout_to_stderr) { - s1 = dup(1); - dup2(2, 1); - } else if (need_out) { - s1 = dup(1); - dup2(fdout[1], 1); - } else if (cmd->out > 1) { - s1 = dup(1); - dup2(cmd->out, 1); - } - - if (cmd->dir) - die("chdir in start_command() not implemented"); - if (cmd->env) { - env = copy_environ(); - for (; *cmd->env; cmd->env++) - env = env_setenv(env, *cmd->env); - } - - if (cmd->perf_cmd) { - cmd->argv = prepare_perf_cmd(cmd->argv); - } - - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); - - if (cmd->env) - free_environ(env); - if (cmd->perf_cmd) - free(cmd->argv); - - cmd->argv = sargv; - if (s0 >= 0) - dup2(s0, 0), close(s0); - if (s1 >= 0) - dup2(s1, 1), close(s1); - if (s2 >= 0) - dup2(s2, 2), close(s2); -#endif if (cmd->pid < 0) { int err = errno; @@ -288,14 +222,6 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const return run_command(&cmd); } -#ifdef __MINGW32__ -static __stdcall unsigned run_thread(void *data) -{ - struct async *async = data; - return async->proc(async->fd_for_proc, async->data); -} -#endif - int start_async(struct async *async) { int pipe_out[2]; @@ -304,7 +230,6 @@ int start_async(struct async *async) return error("cannot create pipe: %s", strerror(errno)); async->out = pipe_out[0]; -#ifndef __MINGW32__ /* Flush stdio before fork() to avoid cloning buffers */ fflush(NULL); @@ -319,33 +244,17 @@ int start_async(struct async *async) exit(!!async->proc(pipe_out[1], async->data)); } close(pipe_out[1]); -#else - async->fd_for_proc = pipe_out[1]; - async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); - if (!async->tid) { - error("cannot create thread: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } -#endif + return 0; } int finish_async(struct async *async) { -#ifndef __MINGW32__ int ret = 0; if (wait_or_whine(async->pid)) ret = error("waitpid (async) failed"); -#else - DWORD ret = 0; - if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) - ret = error("waiting for thread failed: %lu", GetLastError()); - else if (!GetExitCodeThread(async->tid, &ret)) - ret = error("cannot get thread exit code: %lu", GetLastError()); - CloseHandle(async->tid); -#endif + return ret; } diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h index 328289f23669..cc1837deba88 100644 --- a/tools/perf/util/run-command.h +++ b/tools/perf/util/run-command.h @@ -79,12 +79,7 @@ struct async { int (*proc)(int fd, void *data); void *data; int out; /* caller reads from here and closes it */ -#ifndef __MINGW32__ pid_t pid; -#else - HANDLE tid; - int fd_for_proc; -#endif }; int start_async(struct async *async); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b8cfed776d81..b4be6071c105 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -67,7 +67,6 @@ #include #include #include -#ifndef __MINGW32__ #include #include #include @@ -81,20 +80,6 @@ #include #include #include -#if defined(__CYGWIN__) -#undef _XOPEN_SOURCE -#include -#define _XOPEN_SOURCE 600 -#include "compat/cygwin.h" -#else -#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ -#include -#define _ALL_SOURCE 1 -#endif -#else /* __MINGW32__ */ -/* pull in Windows compatibility stuff */ -#include "compat/mingw.h" -#endif /* __MINGW32__ */ #ifndef NO_ICONV #include -- cgit v1.2.3 From 0cfb7a13b8e4e0afd4b856156ab16a182de7505b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:10:30 +0200 Subject: perf stat: Add -n/--null option to run without counters Allow a no-counters run. This can be useful to measure just elapsed wall-clock time - or to assess the raw overhead of perf stat itself, without running any counters. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8420ec589506..cdcd058fac08 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -70,6 +70,7 @@ static int run_count = 1; static int inherit = 1; static int scale = 1; static int target_pid = -1; +static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; @@ -461,6 +462,8 @@ static const struct option options[] = { "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, "repeat command and print average + stddev (max: 100)"), + OPT_BOOLEAN('n', "null", &null_run, + "null run - dont start any counters"), OPT_END() }; @@ -476,7 +479,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) if (run_count <= 0 || run_count > MAX_RUN) usage_with_options(stat_usage, options); - if (!nr_counters) + if (!null_run && !nr_counters) nr_counters = 8; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -- cgit v1.2.3 From 566747e6298289c5cb02d4939cb3abf1c4fe7e5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:24:32 +0200 Subject: perf stat: Fix multi-run stats In multi-run (-r/--repeat) printouts, print out the noise of the wall-clock average as well. Also, fix a bug in printing out scaled counters: if it was not scaled then we should not update the average with -1. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cdcd058fac08..52c176cc683e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -353,8 +353,11 @@ static void calc_avg(void) event_res_avg[j]+1, event_res[i][j]+1); update_avg("counter/2", j, event_res_avg[j]+2, event_res[i][j]+2); - update_avg("scaled", j, - event_scaled_avg + j, event_scaled[i]+j); + if (event_scaled[i][j] != -1) + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); + else + event_scaled_avg[j] = -1; } } runtime_nsecs_avg /= run_count; @@ -420,9 +423,13 @@ static void print_stat(int argc, const char **argv) fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", + fprintf(stderr, " %14.9f seconds time elapsed", (double)walltime_nsecs_avg/1e9); - fprintf(stderr, "\n"); + if (run_count > 1) { + fprintf(stderr, " ( +- %7.3f%% )", + 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); + } + fprintf(stderr, "\n\n"); } static volatile int signr = -1; -- cgit v1.2.3 From 6e750a8fc009fd0ae98704525d1d8e80d60e8cc9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Jun 2009 03:02:07 +0530 Subject: perf stat: Improve output Increase size for event name to handle bigger names like 'L1-d$-prefetch-misses' Changed scaled counters from percentage to a multiplicative factor because the latter is more expressive. Also aligned the scaling factor, otherwise sometimes it looks like: 384 iTLB-load-misses (4.74x scaled) 452029 branch-loads (8.00x scaled) 5892 branch-load-misses (20.39x scaled) 972315 iTLB-loads (3.24x scaled) Before: 150708 L1-d$-stores (scaled from 23.57%) 428804 L1-d$-prefetches (scaled from 23.47%) 314446 L1-d$-prefetch-misses (scaled from 23.42%) 252626137 L1-i$-loads (scaled from 23.24%) 5297550 dTLB-load-misses (scaled from 23.96%) 106992392 branch-loads (scaled from 23.67%) 5239561 branch-load-misses (scaled from 23.43%) After: 1731713 L1-d$-loads ( 14.25x scaled) 44241 L1-d$-prefetches ( 3.88x scaled) 21076 L1-d$-prefetch-misses ( 3.40x scaled) 5789421 L1-i$-loads ( 3.78x scaled) 29645 dTLB-load-misses ( 2.95x scaled) 461474 branch-loads ( 6.52x scaled) 7493 branch-load-misses ( 26.57x scaled) Reported-by: Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246051927.2988.10.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 52c176cc683e..3840a70f05b7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -32,6 +32,7 @@ * Wu Fengguang * Mike Galbraith * Paul Mackerras + * Jaswinder Singh Rajput * * Released under the GPL v2. (and only v2, not any later version) */ @@ -251,7 +252,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) { double msecs = (double)count[0] / 1000000; - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { @@ -265,7 +266,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) static void abs_printout(int counter, u64 *count, u64 *noise) { - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); if (runtime_cycles_avg && attrs[counter].type == PERF_TYPE_HARDWARE && @@ -295,7 +296,7 @@ static void print_counter(int counter) scaled = event_scaled_avg[counter]; if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", + fprintf(stderr, " %14s %-24s\n", "", event_name(counter)); return; } @@ -306,8 +307,7 @@ static void print_counter(int counter) abs_printout(counter, count, noise); if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); + fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]); fprintf(stderr, "\n"); } @@ -421,7 +421,6 @@ static void print_stat(int argc, const char **argv) for (counter = 0; counter < nr_counters; counter++) print_counter(counter); - fprintf(stderr, "\n"); fprintf(stderr, " %14.9f seconds time elapsed", (double)walltime_nsecs_avg/1e9); -- cgit v1.2.3 From c3043569dc8fbe9228b76174f15d1a7152c48a20 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Jun 2009 23:49:09 +0530 Subject: perf stat: Micro-optimize the code: memcpy is only required if no event is selected and !null_run Set attrs and nr_counters if no event is selected and !null_run. Setting of attrs should depend on number of counters, so we need to memcpy only for sizeof(default_attrs) Also set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246126749.32198.16.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3840a70f05b7..3e5ea4e2e5fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,7 +46,7 @@ #include #include -static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { +static struct perf_counter_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, @@ -477,16 +477,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix) { int status; - memcpy(attrs, default_attrs, sizeof(attrs)); - argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); if (run_count <= 0 || run_count > MAX_RUN) usage_with_options(stat_usage, options); - if (!null_run && !nr_counters) - nr_counters = 8; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!null_run && !nr_counters) { + memcpy(attrs, default_attrs, sizeof(default_attrs)); + nr_counters = ARRAY_SIZE(default_attrs); + } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); -- cgit v1.2.3 From 210ad39fb7ef0bc0494483f517f42524f16bb2a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 29 Jun 2009 21:50:54 +0200 Subject: perf stat: Use percentages for scaling output Peter expressed a strong preference for percentage based display of scaled values - so revert to that from the recently introduced multiplication-factor unit. Reported-by: Peter Zijlstra Cc: Jaswinder Singh Rajput Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3e5ea4e2e5fd..c5a290727a92 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -307,7 +307,8 @@ static void print_counter(int counter) abs_printout(counter, count, noise); if (scaled) - fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]); + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); fprintf(stderr, "\n"); } -- cgit v1.2.3 From 051ae7f7344f453616b6b10332d4d8e1d40ed823 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 29 Jun 2009 21:13:21 +1000 Subject: perf_counter tools: Reduce perf stat measurement overhead/skew Vince Weaver reported a 'perf stat' measurement overhead in the count of retired instructions, which can amount to a +6000 instructions inflated count in the reported count. At present, perf stat creates its counters on the perf process. Thus the counters count the fork and various other activity in both the parent and child, such as the resolver overhead for resolving PLT entries for any libc functions that haven't been called before, such as execvp. This reduces the overhead by creating the counters on the child process after the fork, using a couple of pipes to synchronize so that the child process waits until the parent has created the counters before doing the exec. To eliminate the PLT resolution overhead on calling execvp, this does a dummy execvp first which will always fail. With this, the overhead of executing a program goes down from over 4800 instructions to about 90 instructions on powerpc (32-bit). This was measured with a statically-linked program written in assembler which only does the 3 instructions needed to call _exit(0). Before: $ perf stat -e 0:1:u ./three Performance counter stats for './three': 4858 instructions 0.001274523 seconds time elapsed After: $ perf stat -e 0:1:u ./three Performance counter stats for './three': 92 instructions 0.000468153 seconds time elapsed Reported-by: Vince Weaver Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19016.41425.814043.870352@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 64 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c5a290727a92..201ef2367dcb 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -99,7 +99,7 @@ static u64 runtime_cycles_noise; #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" -static void create_perf_stat_counter(int counter) +static void create_perf_stat_counter(int counter, int pid) { struct perf_counter_attr *attr = attrs + counter; @@ -119,7 +119,7 @@ static void create_perf_stat_counter(int counter) attr->inherit = inherit; attr->disabled = 1; - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[0][counter], strerror(errno)); @@ -205,12 +205,58 @@ static int run_perf_stat(int argc, const char **argv) int status = 0; int counter; int pid; + int child_ready_pipe[2], go_pipe[2]; + char buf; if (!system_wide) nr_cpus = 1; + if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + perror("failed to create pipes"); + exit(1); + } + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + read(go_pipe[0], &buf, 1); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + exit(-1); + } + + /* + * Wait for the child to be ready to exec. + */ + close(child_ready_pipe[1]); + close(go_pipe[0]); + read(child_ready_pipe[0], &buf, 1); + close(child_ready_pipe[0]); + for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); + create_perf_stat_counter(counter, pid); /* * Enable counters and exec the command: @@ -218,19 +264,9 @@ static int run_perf_stat(int argc, const char **argv) t0 = rdclock(); prctl(PR_TASK_PERF_COUNTERS_ENABLE); - if ((pid = fork()) < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } - } - + close(go_pipe[1]); wait(&status); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); walltime_nsecs[run_idx] = t1 - t0; -- cgit v1.2.3 From 57e7986ed142417498155ebcd5eaf617ac37136d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 30 Jun 2009 16:07:19 +1000 Subject: perf_counter: Provide a way to enable counters on exec This provides a way to mark a counter to be enabled on the next exec. This is useful for measuring the total activity of a program without including overhead from the process that launches it. This also changes the perf stat command to use this new facility. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19017.43927.838745.689203@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- kernel/perf_counter.c | 50 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/builtin-stat.c | 6 +++--- 3 files changed, 55 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3078e23c91eb..5e970c7d3fd5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -179,8 +179,9 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 66ab1e9d1294..d55a50da2347 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1428,6 +1428,53 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) perf_counter_task_sched_in(curr, cpu); } +/* + * Enable all of a task's counters that have been marked enable-on-exec. + * This expects task == current. + */ +static void perf_counter_enable_on_exec(struct task_struct *task) +{ + struct perf_counter_context *ctx; + struct perf_counter *counter; + unsigned long flags; + int enabled = 0; + + local_irq_save(flags); + ctx = task->perf_counter_ctxp; + if (!ctx || !ctx->nr_counters) + goto out; + + __perf_counter_task_sched_out(ctx); + + spin_lock(&ctx->lock); + + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (!counter->attr.enable_on_exec) + continue; + counter->attr.enable_on_exec = 0; + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + continue; + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; + enabled = 1; + } + + /* + * Unclone this context if we enabled any counter. + */ + if (enabled && ctx->parent_ctx) { + put_ctx(ctx->parent_ctx); + ctx->parent_ctx = NULL; + } + + spin_unlock(&ctx->lock); + + perf_counter_task_sched_in(task, smp_processor_id()); + out: + local_irq_restore(flags); +} + /* * Cross CPU call to read the hardware counter */ @@ -2949,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task) { struct perf_comm_event comm_event; + if (task->perf_counter_ctxp) + perf_counter_enable_on_exec(task); + if (!atomic_read(&nr_comm_counters)) return; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 201ef2367dcb..2e03524a1de0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -116,8 +116,9 @@ static void create_perf_stat_counter(int counter, int pid) fd[cpu][counter], strerror(errno)); } } else { - attr->inherit = inherit; - attr->disabled = 1; + attr->inherit = inherit; + attr->disabled = 1; + attr->enable_on_exec = 1; fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) @@ -262,7 +263,6 @@ static int run_perf_stat(int argc, const char **argv) * Enable counters and exec the command: */ t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); close(go_pipe[1]); wait(&status); -- cgit v1.2.3 From f5812a7a336fb952d819e4427b9a2dce02368e82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 11:43:17 -0300 Subject: perf_counter tools: Adjust only prelinked symbol's addresses I.e. we can't handle these two kinds of files in the same way: 1) prelinked system library: [acme@doppio pahole]$ readelf -s /usr/lib64/libdw-0.141.so | egrep 'FUNC.+GLOBAL.+dwfl_report_elf' 278: 00000030450105a0 261 FUNC GLOBAL DEFAULT 12 dwfl_report_elf@@ELFUTILS_0.122 2) not prelinked library with debug information from a -debuginfo package: [acme@doppio pahole]$ readelf -s /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | egrep 'FUNC.+GLOBAL.+dwfl_report_elf' 629: 00000000000105a0 261 FUNC GLOBAL DEFAULT 12 dwfl_report_elf [acme@doppio pahole]$ Now the numbers I got for a pahole perf run are in line with the numbers I get from oprofile. Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090630144317.GB12663@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 16 +++++++++++----- tools/perf/util/symbol.h | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9c659ef6aec2..78c2efde01b7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -520,7 +520,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - + self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL; elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; @@ -535,11 +537,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, gelf_getshdr(sec, &shdr); obj_start = sym.st_value; - if (verbose >= 2) - printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", - (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + if (self->prelinked) { + if (verbose >= 2) + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } f = symbol__new(sym.st_value, sym.st_size, elf_sym__name(&sym, symstrs), @@ -573,6 +577,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) if (!name) return -1; + self->prelinked = 0; + if (strncmp(self->name, "/tmp/perf-", 10) == 0) return dso__load_perf_map(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 940b432db16e..2c48ace8203b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -20,8 +20,9 @@ struct symbol { struct dso { struct list_head node; struct rb_root syms; - unsigned int sym_priv_size; struct symbol *(*find_symbol)(struct dso *, u64 ip); + unsigned int sym_priv_size; + unsigned char prelinked; char name[0]; }; -- cgit v1.2.3 From 25903407da21552419e0955705d6d8c8e601cb2e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:20 -0300 Subject: perf report: Add --dsos parameter So that we can filter by dso. Symbols in other dsos won't be accounted for. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-2-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 + tools/perf/Makefile | 2 + tools/perf/builtin-report.c | 16 +++ tools/perf/util/strlist.c | 184 +++++++++++++++++++++++++++++++ tools/perf/util/strlist.h | 32 ++++++ 5 files changed, 238 insertions(+) create mode 100644 tools/perf/util/strlist.c create mode 100644 tools/perf/util/strlist.h (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 40c1db83a16d..13d85ca8c914 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -20,6 +20,10 @@ OPTIONS -i:: --input=:: Input file name. (default: perf.data) +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1c1296d8a64b..9c6d0ae3708e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -301,6 +301,7 @@ LIB_H += util/util.h LIB_H += util/help.h LIB_H += util/strbuf.h LIB_H += util/string.h +LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h @@ -322,6 +323,7 @@ LIB_OBJS += util/run-command.o LIB_OBJS += util/quote.o LIB_OBJS += util/strbuf.o LIB_OBJS += util/string.o +LIB_OBJS += util/strlist.o LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ed391db9e0f8..7c6b6e776718 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,6 +16,7 @@ #include "util/symbol.h" #include "util/string.h" #include "util/callchain.h" +#include "util/strlist.h" #include "perf.h" #include "util/header.h" @@ -32,6 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; +static char *dso_list_str; +static struct strlist *dso_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -1272,6 +1275,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (show & show_mask) { struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); + if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) + return 0; + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; @@ -1659,6 +1665,8 @@ static const struct option options[] = { OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), + OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", + "only consider symbols in these dsos"), OPT_END() }; @@ -1698,6 +1706,14 @@ int cmd_report(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(report_usage, options); + if (dso_list_str) { + dso_list = strlist__new(true, dso_list_str); + if (!dso_list) { + fprintf(stderr, "problems parsing dso list\n"); + exit(129); + } + } + setup_pager(); return __cmd_report(); diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c new file mode 100644 index 000000000000..025a78edfffe --- /dev/null +++ b/tools/perf/util/strlist.c @@ -0,0 +1,184 @@ +/* + * (c) 2009 Arnaldo Carvalho de Melo + * + * Licensed under the GPLv2. + */ + +#include "strlist.h" +#include +#include +#include +#include + +static struct str_node *str_node__new(const char *s, bool dupstr) +{ + struct str_node *self = malloc(sizeof(*self)); + + if (self != NULL) { + if (dupstr) { + s = strdup(s); + if (s == NULL) + goto out_delete; + } + self->s = s; + } + + return self; + +out_delete: + free(self); + return NULL; +} + +static void str_node__delete(struct str_node *self, bool dupstr) +{ + if (dupstr) + free((void *)self->s); + free(self); +} + +int strlist__add(struct strlist *self, const char *new_entry) +{ + struct rb_node **p = &self->entries.rb_node; + struct rb_node *parent = NULL; + struct str_node *sn; + + while (*p != NULL) { + int rc; + + parent = *p; + sn = rb_entry(parent, struct str_node, rb_node); + rc = strcmp(sn->s, new_entry); + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + sn = str_node__new(new_entry, self->dupstr); + if (sn == NULL) + return -ENOMEM; + + rb_link_node(&sn->rb_node, parent, p); + rb_insert_color(&sn->rb_node, &self->entries); + + return 0; +} + +int strlist__load(struct strlist *self, const char *filename) +{ + char entry[1024]; + int err; + FILE *fp = fopen(filename, "r"); + + if (fp == NULL) + return errno; + + while (fgets(entry, sizeof(entry), fp) != NULL) { + const size_t len = strlen(entry); + + if (len == 0) + continue; + entry[len - 1] = '\0'; + + err = strlist__add(self, entry); + if (err != 0) + goto out; + } + + err = 0; +out: + fclose(fp); + return err; +} + +void strlist__remove(struct strlist *self, struct str_node *sn) +{ + rb_erase(&sn->rb_node, &self->entries); + str_node__delete(sn, self->dupstr); +} + +bool strlist__has_entry(struct strlist *self, const char *entry) +{ + struct rb_node **p = &self->entries.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + struct str_node *sn; + int rc; + + parent = *p; + sn = rb_entry(parent, struct str_node, rb_node); + rc = strcmp(sn->s, entry); + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return true; + } + + return false; +} + +static int strlist__parse_list_entry(struct strlist *self, const char *s) +{ + if (strncmp(s, "file://", 7) == 0) + return strlist__load(self, s + 7); + + return strlist__add(self, s); +} + +int strlist__parse_list(struct strlist *self, const char *s) +{ + char *sep; + int err; + + while ((sep = strchr(s, ',')) != NULL) { + *sep = '\0'; + err = strlist__parse_list_entry(self, s); + *sep = ','; + if (err != 0) + return err; + s = sep + 1; + } + + return *s ? strlist__parse_list_entry(self, s) : 0; +} + +struct strlist *strlist__new(bool dupstr, const char *slist) +{ + struct strlist *self = malloc(sizeof(*self)); + + if (self != NULL) { + self->entries = RB_ROOT; + self->dupstr = dupstr; + if (slist && strlist__parse_list(self, slist) != 0) + goto out_error; + } + + return self; +out_error: + free(self); + return NULL; +} + +void strlist__delete(struct strlist *self) +{ + if (self != NULL) { + struct str_node *pos; + struct rb_node *next = rb_first(&self->entries); + + while (next) { + pos = rb_entry(next, struct str_node, rb_node); + next = rb_next(&pos->rb_node); + strlist__remove(self, pos); + } + self->entries = RB_ROOT; + free(self); + } +} diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h new file mode 100644 index 000000000000..2fb117fb4b67 --- /dev/null +++ b/tools/perf/util/strlist.h @@ -0,0 +1,32 @@ +#ifndef STRLIST_H_ +#define STRLIST_H_ + +#include "rbtree.h" +#include + +struct str_node { + struct rb_node rb_node; + const char *s; +}; + +struct strlist { + struct rb_root entries; + bool dupstr; +}; + +struct strlist *strlist__new(bool dupstr, const char *slist); +void strlist__delete(struct strlist *self); + +void strlist__remove(struct strlist *self, struct str_node *sn); +int strlist__load(struct strlist *self, const char *filename); +int strlist__add(struct strlist *self, const char *str); + +bool strlist__has_entry(struct strlist *self, const char *entry); + +static inline bool strlist__empty(const struct strlist *self) +{ + return rb_first(&self->entries) == NULL; +} + +int strlist__parse_list(struct strlist *self, const char *s); +#endif /* STRLIST_H_ */ -- cgit v1.2.3 From cc8b88b15ab8e5ae162a46c4b6b286b555190dd1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:21 -0300 Subject: perf report: Add --comms parameter So that we can filter by comm. Symbols in other comms won't be accounted for. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-3-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 33 ++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 13d85ca8c914..4c44ef1747b9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,6 +24,10 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. +-C:: +--comms=:: + Only consider symbols in these comms. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7c6b6e776718..8143477b7ef7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str; -static struct strlist *dso_list; +static char *dso_list_str, *comm_list_str; +static struct strlist *dso_list, *comm_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -240,7 +240,7 @@ static u64 vdso__map_ip(struct map *map, u64 ip) static inline int is_anon_memory(const char *filename) { - return strcmp(filename, "//anon") == 0; + return strcmp(filename, "//anon") == 0; } static struct map *map__new(struct mmap_event *event) @@ -1253,6 +1253,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } + if (comm_list && !strlist__has_entry(comm_list, thread->comm)) + return 0; + if (event->header.misc & PERF_EVENT_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1667,6 +1670,8 @@ static const struct option options[] = { OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), + OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", + "only consider symbols in these comms"), OPT_END() }; @@ -1685,6 +1690,19 @@ static void setup_sorting(void) free(str); } +static void setup_list(struct strlist **list, const char *list_str, + const char *list_name) +{ + if (list_str) { + *list = strlist__new(true, list_str); + if (!*list) { + fprintf(stderr, "problems parsing %s list\n", + list_name); + exit(129); + } + } +} + int cmd_report(int argc, const char **argv, const char *prefix) { symbol__init(); @@ -1706,13 +1724,8 @@ int cmd_report(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(report_usage, options); - if (dso_list_str) { - dso_list = strlist__new(true, dso_list_str); - if (!dso_list) { - fprintf(stderr, "problems parsing dso list\n"); - exit(129); - } - } + setup_list(&dso_list, dso_list_str, "dso"); + setup_list(&comm_list, comm_list_str, "comm"); setup_pager(); -- cgit v1.2.3 From 7bec7a9134c25cecb0d7029199b59f7b1bef35b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:22 -0300 Subject: perf report: Add --symbols parameter So that we can filter by symbol name. The 'pfunct' utility in the 'dwarves' package can be used to create a file with the functions one wants. Example: [acme@doppio pahole]$ pfunct /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | grep dwarf > /tmp/dwarf.symbols [acme@doppio pahole]$ wc -l /tmp/dwarf.symbols 93 /tmp/dwarf.symbols [acme@doppio pahole]$ head -3 /tmp/dwarf.symbols dwfl_addrdwarf dwfl_module_getdwarf dwfl_getdwarf [acme@doppio pahole]$ perf report --sort comm,dso,symbol --comms pahole --dsos /usr/lib64/libdw-0.141.so --symbols file:///tmp/dwarf.symbols 33.99% pahole /usr/lib64/libdw-0.141.so [.] dwarf_tag 29.07% pahole /usr/lib64/libdw-0.141.so [.] dwarf_decl_file 27.71% pahole /usr/lib64/libdw-0.141.so [.] dwarf_getsrclines 4.54% pahole /usr/lib64/libdw-0.141.so 0x00000000007400 3.93% pahole /usr/lib64/libdw-0.141.so [.] dwarf_decl_line 0.46% pahole /usr/lib64/libdw-0.141.so [.] dwarf_getlocation 0.18% pahole /usr/lib64/libdw-0.141.so [.] __libdwarf_next_prime 0.13% pahole /usr/lib64/libdw-0.141.so [.] dwarf_diecu [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-4-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4c44ef1747b9..8aa3f8c88707 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -28,6 +28,10 @@ OPTIONS --comms=:: Only consider symbols in these comms. CSV that understands file://filename entries. +-S:: +--symbols=:: + Only consider these symbols. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8143477b7ef7..135b7837e6bf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str, *comm_list_str; -static struct strlist *dso_list, *comm_list; +static char *dso_list_str, *comm_list_str, *sym_list_str; +static struct strlist *dso_list, *comm_list, *sym_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -1281,6 +1281,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) return 0; + if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + return 0; + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; @@ -1672,6 +1675,8 @@ static const struct option options[] = { "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), + OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), OPT_END() }; @@ -1726,6 +1731,7 @@ int cmd_report(int argc, const char **argv, const char *prefix) setup_list(&dso_list, dso_list_str, "dso"); setup_list(&comm_list, comm_list_str, "comm"); + setup_list(&sym_list, sym_list_str, "symbol"); setup_pager(); -- cgit v1.2.3