diff options
-rw-r--r-- | tools/lib/traceevent/Makefile | 2 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-trace.txt | 16 | ||||
-rw-r--r-- | tools/perf/Makefile | 6 | ||||
-rw-r--r-- | tools/perf/builtin-kvm.c | 1 | ||||
-rw-r--r-- | tools/perf/builtin-trace.c | 379 | ||||
-rw-r--r-- | tools/perf/util/evlist.c | 3 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 93 | ||||
-rw-r--r-- | tools/perf/util/machine.c | 50 | ||||
-rw-r--r-- | tools/perf/util/python.c | 20 | ||||
-rw-r--r-- | tools/perf/util/thread.c | 3 | ||||
-rw-r--r-- | tools/perf/util/thread.h | 8 |
11 files changed, 488 insertions, 93 deletions
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 0794acca46a3..ca6cb779876a 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -60,7 +60,7 @@ ifeq ($(BUILD_SRC),) ifneq ($(BUILD_OUTPUT),) define build_output - $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ + $(if $(VERBOSE:1=),@)+$(MAKE) -C $(BUILD_OUTPUT) \ BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 endef diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 3b3552a8959e..fe19811faf90 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -23,29 +23,45 @@ analysis phases. OPTIONS ------- +-a:: --all-cpus:: System-wide collection from all CPUs. -e:: --expr:: List of events to show, currently only syscall names. + Prefixing with ! shows all syscalls but the ones specified. You may + need to escape it. + +-o:: +--output=:: + Output file name. -p:: --pid=:: Record events on existing process ID (comma separated list). +-t:: --tid=:: Record events on existing thread ID (comma separated list). +-u:: --uid=:: Record events in threads owned by uid. Name or number. +-v:: +--verbose=:: + Verbosity level. + +-i:: --no-inherit:: Child tasks do not inherit counters. +-m:: --mmap-pages=:: Number of mmap data pages. Must be a power of two. +-C:: --cpu:: Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e0d3d9f96771..7d30a7ddcc88 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -766,17 +766,21 @@ check: $(OUTPUT)common-cmds.h install-bin: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' +ifndef NO_LIBPERL $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' - $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' +endif +ifndef NO_LIBPYTHON $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' +endif $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests' diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index fa2f3d79886a..6cd4de59be21 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1305,7 +1305,6 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) const char * const record_args[] = { "record", "-R", - "-f", "-m", "1024", "-c", "1", }; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 120fdfb3d920..b72afc73f9a7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1,6 +1,7 @@ #include <traceevent/event-parse.h> #include "builtin.h" #include "util/color.h" +#include "util/debug.h" #include "util/evlist.h" #include "util/machine.h" #include "util/thread.h" @@ -10,27 +11,166 @@ #include <libaudit.h> #include <stdlib.h> +#include <sys/mman.h> + +static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, unsigned long arg) +{ + return scnprintf(bf, size, "%#lx", arg); +} + +#define SCA_HEX syscall_arg__scnprintf_hex + +static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, unsigned long arg) +{ + int printed = 0, prot = arg; + + if (prot == PROT_NONE) + return scnprintf(bf, size, "NONE"); +#define P_MMAP_PROT(n) \ + if (prot & PROT_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + prot &= ~PROT_##n; \ + } + + P_MMAP_PROT(EXEC); + P_MMAP_PROT(READ); + P_MMAP_PROT(WRITE); +#ifdef PROT_SEM + P_MMAP_PROT(SEM); +#endif + P_MMAP_PROT(GROWSDOWN); + P_MMAP_PROT(GROWSUP); +#undef P_MMAP_PROT + + if (prot) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); + + return printed; +} + +#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot + +static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, unsigned long arg) +{ + int printed = 0, flags = arg; + +#define P_MMAP_FLAG(n) \ + if (flags & MAP_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~MAP_##n; \ + } + + P_MMAP_FLAG(SHARED); + P_MMAP_FLAG(PRIVATE); + P_MMAP_FLAG(32BIT); + P_MMAP_FLAG(ANONYMOUS); + P_MMAP_FLAG(DENYWRITE); + P_MMAP_FLAG(EXECUTABLE); + P_MMAP_FLAG(FILE); + P_MMAP_FLAG(FIXED); + P_MMAP_FLAG(GROWSDOWN); + P_MMAP_FLAG(HUGETLB); + P_MMAP_FLAG(LOCKED); + P_MMAP_FLAG(NONBLOCK); + P_MMAP_FLAG(NORESERVE); + P_MMAP_FLAG(POPULATE); + P_MMAP_FLAG(STACK); +#ifdef MAP_UNINITIALIZED + P_MMAP_FLAG(UNINITIALIZED); +#endif +#undef P_MMAP_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags + +static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, unsigned long arg) +{ + int behavior = arg; + + switch (behavior) { +#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) + P_MADV_BHV(NORMAL); + P_MADV_BHV(RANDOM); + P_MADV_BHV(SEQUENTIAL); + P_MADV_BHV(WILLNEED); + P_MADV_BHV(DONTNEED); + P_MADV_BHV(REMOVE); + P_MADV_BHV(DONTFORK); + P_MADV_BHV(DOFORK); + P_MADV_BHV(HWPOISON); +#ifdef MADV_SOFT_OFFLINE + P_MADV_BHV(SOFT_OFFLINE); +#endif + P_MADV_BHV(MERGEABLE); + P_MADV_BHV(UNMERGEABLE); + P_MADV_BHV(HUGEPAGE); + P_MADV_BHV(NOHUGEPAGE); +#ifdef MADV_DONTDUMP + P_MADV_BHV(DONTDUMP); +#endif +#ifdef MADV_DODUMP + P_MADV_BHV(DODUMP); +#endif +#undef P_MADV_PHV + default: break; + } + + return scnprintf(bf, size, "%#x", behavior); +} + +#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior static struct syscall_fmt { const char *name; const char *alias; + size_t (*arg_scnprintf[6])(char *bf, size_t size, unsigned long arg); bool errmsg; bool timeout; + bool hexret; } syscall_fmts[] = { { .name = "access", .errmsg = true, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, + { .name = "brk", .hexret = true, + .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, + { .name = "mmap", .hexret = true, }, { .name = "connect", .errmsg = true, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "futex", .errmsg = true, }, + { .name = "ioctl", .errmsg = true, + .arg_scnprintf = { [2] = SCA_HEX, /* arg */ }, }, + { .name = "lstat", .errmsg = true, .alias = "newlstat", }, + { .name = "madvise", .errmsg = true, + .arg_scnprintf = { [0] = SCA_HEX, /* start */ + [2] = SCA_MADV_BHV, /* behavior */ }, }, + { .name = "mmap", .hexret = true, + .arg_scnprintf = { [0] = SCA_HEX, /* addr */ + [2] = SCA_MMAP_PROT, /* prot */ + [3] = SCA_MMAP_FLAGS, /* flags */ }, }, + { .name = "mprotect", .errmsg = true, + .arg_scnprintf = { [0] = SCA_HEX, /* start */ + [2] = SCA_MMAP_PROT, /* prot */ }, }, + { .name = "mremap", .hexret = true, + .arg_scnprintf = { [0] = SCA_HEX, /* addr */ + [4] = SCA_HEX, /* new_addr */ }, }, + { .name = "munmap", .errmsg = true, + .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "open", .errmsg = true, }, { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, + { .name = "pread", .errmsg = true, .alias = "pread64", }, + { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "read", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "socket", .errmsg = true, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "uname", .errmsg = true, .alias = "newuname", }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -50,6 +190,7 @@ struct syscall { const char *name; bool filtered; struct syscall_fmt *fmt; + size_t (**arg_scnprintf)(char *bf, size_t size, unsigned long arg); }; static size_t fprintf_duration(unsigned long t, FILE *fp) @@ -63,7 +204,7 @@ static size_t fprintf_duration(unsigned long t, FILE *fp) printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration); else printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration); - return printed + fprintf(stdout, "): "); + return printed + fprintf(fp, "): "); } struct thread_trace { @@ -80,7 +221,7 @@ static struct thread_trace *thread_trace__new(void) return zalloc(sizeof(struct thread_trace)); } -static struct thread_trace *thread__trace(struct thread *thread) +static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) { struct thread_trace *ttrace; @@ -98,12 +239,13 @@ static struct thread_trace *thread__trace(struct thread *thread) return ttrace; fail: - color_fprintf(stdout, PERF_COLOR_RED, + color_fprintf(fp, PERF_COLOR_RED, "WARNING: not enough memory, dropping samples!\n"); return NULL; } struct trace { + struct perf_tool tool; int audit_machine; struct { int max; @@ -112,8 +254,10 @@ struct trace { struct perf_record_opts opts; struct machine host; u64 base_time; - struct strlist *ev_qualifier; + FILE *output; unsigned long nr_events; + struct strlist *ev_qualifier; + bool not_ev_qualifier; bool sched; bool multiple_threads; double duration_filter; @@ -151,13 +295,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre return printed; } -static int trace__process_event(struct machine *machine, union perf_event *event) +static int trace__process_event(struct trace *trace, struct machine *machine, + union perf_event *event) { int ret = 0; switch (event->header.type) { case PERF_RECORD_LOST: - color_fprintf(stdout, PERF_COLOR_RED, + color_fprintf(trace->output, PERF_COLOR_RED, "LOST %" PRIu64 " events!\n", event->lost.lost); ret = machine__process_lost_event(machine, event); default: @@ -168,12 +313,13 @@ static int trace__process_event(struct machine *machine, union perf_event *event return ret; } -static int trace__tool_process(struct perf_tool *tool __maybe_unused, +static int trace__tool_process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) { - return trace__process_event(machine, event); + struct trace *trace = container_of(tool, struct trace, tool); + return trace__process_event(trace, machine, event); } static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) @@ -187,11 +333,11 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) machine__create_kernel_maps(&trace->host); if (perf_target__has_task(&trace->opts.target)) { - err = perf_event__synthesize_thread_map(NULL, evlist->threads, + err = perf_event__synthesize_thread_map(&trace->tool, evlist->threads, trace__tool_process, &trace->host); } else { - err = perf_event__synthesize_threads(NULL, trace__tool_process, + err = perf_event__synthesize_threads(&trace->tool, trace__tool_process, &trace->host); } @@ -201,6 +347,26 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) return err; } +static int syscall__set_arg_fmts(struct syscall *sc) +{ + struct format_field *field; + int idx = 0; + + sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); + if (sc->arg_scnprintf == NULL) + return -1; + + for (field = sc->tp_format->format.fields->next; field; field = field->next) { + if (sc->fmt && sc->fmt->arg_scnprintf[idx]) + sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; + else if (field->flags & FIELD_IS_POINTER) + sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; + ++idx; + } + + return 0; +} + static int trace__read_syscall_info(struct trace *trace, int id) { char tp_name[128]; @@ -230,13 +396,17 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc = trace->syscalls.table + id; sc->name = name; - if (trace->ev_qualifier && !strlist__find(trace->ev_qualifier, name)) { - sc->filtered = true; - /* - * No need to do read tracepoint information since this will be - * filtered out. - */ - return 0; + if (trace->ev_qualifier) { + bool in = strlist__find(trace->ev_qualifier, name) != NULL; + + if (!(in ^ trace->not_ev_qualifier)) { + sc->filtered = true; + /* + * No need to do read tracepoint information since this will be + * filtered out. + */ + return 0; + } } sc->fmt = syscall_fmt__find(sc->name); @@ -249,7 +419,10 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = event_format__new("syscalls", tp_name); } - return sc->tp_format != NULL ? 0 : -1; + if (sc->tp_format == NULL) + return -1; + + return syscall__set_arg_fmts(sc); } static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, @@ -263,8 +436,14 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, for (field = sc->tp_format->format.fields->next; field; field = field->next) { printed += scnprintf(bf + printed, size - printed, - "%s%s: %ld", printed ? ", " : "", - field->name, args[i++]); + "%s%s: ", printed ? ", " : "", field->name); + + if (sc->arg_scnprintf && sc->arg_scnprintf[i]) + printed += sc->arg_scnprintf[i](bf + printed, size - printed, args[i]); + else + printed += scnprintf(bf + printed, size - printed, + "%ld", args[i]); + ++i; } } else { while (i < 6) { @@ -288,7 +467,22 @@ static struct syscall *trace__syscall_info(struct trace *trace, int id = perf_evsel__intval(evsel, sample, "id"); if (id < 0) { - printf("Invalid syscall %d id, skipping...\n", id); + + /* + * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried + * before that, leaving at a higher verbosity level till that is + * explained. Reproduced with plain ftrace with: + * + * echo 1 > /t/events/raw_syscalls/sys_exit/enable + * grep "NR -1 " /t/trace_pipe + * + * After generating some load on the machine. + */ + if (verbose > 1) { + static u64 n; + fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", + id, perf_evsel__name(evsel), ++n); + } return NULL; } @@ -302,10 +496,12 @@ static struct syscall *trace__syscall_info(struct trace *trace, return &trace->syscalls.table[id]; out_cant_read: - printf("Problems reading syscall %d", id); - if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) - printf("(%s)", trace->syscalls.table[id].name); - puts(" information"); + if (verbose) { + fprintf(trace->output, "Problems reading syscall %d", id); + if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL) + fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); + fputs(" information\n", trace->output); + } return NULL; } @@ -326,13 +522,13 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, return 0; thread = machine__findnew_thread(&trace->host, sample->tid); - ttrace = thread__trace(thread); + ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) return -1; args = perf_evsel__rawptr(evsel, sample, "args"); if (args == NULL) { - printf("Problems reading syscall arguments\n"); + fprintf(trace->output, "Problems reading syscall arguments\n"); return -1; } @@ -352,8 +548,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) { if (!trace->duration_filter) { - trace__fprintf_entry_head(trace, thread, 1, sample->time, stdout); - printf("%-70s\n", ttrace->entry_str); + trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); + fprintf(trace->output, "%-70s\n", ttrace->entry_str); } } else ttrace->entry_pending = true; @@ -377,7 +573,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, return 0; thread = machine__findnew_thread(&trace->host, sample->tid); - ttrace = thread__trace(thread); + ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) return -1; @@ -394,28 +590,33 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, } else if (trace->duration_filter) goto out; - trace__fprintf_entry_head(trace, thread, duration, sample->time, stdout); + trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output); if (ttrace->entry_pending) { - printf("%-70s", ttrace->entry_str); + fprintf(trace->output, "%-70s", ttrace->entry_str); } else { - printf(" ... ["); - color_fprintf(stdout, PERF_COLOR_YELLOW, "continued"); - printf("]: %s()", sc->name); + fprintf(trace->output, " ... ["); + color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued"); + fprintf(trace->output, "]: %s()", sc->name); } - if (ret < 0 && sc->fmt && sc->fmt->errmsg) { + if (sc->fmt == NULL) { +signed_print: + fprintf(trace->output, ") = %d", ret); + } else if (ret < 0 && sc->fmt->errmsg) { char bf[256]; const char *emsg = strerror_r(-ret, bf, sizeof(bf)), *e = audit_errno_to_name(-ret); - printf(") = -1 %s %s", e, emsg); - } else if (ret == 0 && sc->fmt && sc->fmt->timeout) - printf(") = 0 Timeout"); + fprintf(trace->output, ") = -1 %s %s", e, emsg); + } else if (ret == 0 && sc->fmt->timeout) + fprintf(trace->output, ") = 0 Timeout"); + else if (sc->fmt->hexret) + fprintf(trace->output, ") = %#x", ret); else - printf(") = %d", ret); + goto signed_print; - putchar('\n'); + fputc('\n', trace->output); out: ttrace->entry_pending = false; @@ -428,7 +629,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); double runtime_ms = (double)runtime / NSEC_PER_MSEC; struct thread *thread = machine__findnew_thread(&trace->host, sample->tid); - struct thread_trace *ttrace = thread__trace(thread); + struct thread_trace *ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) goto out_dump; @@ -438,7 +639,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs return 0; out_dump: - printf("%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", + fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", evsel->name, perf_evsel__strval(evsel, sample, "comm"), (pid_t)perf_evsel__intval(evsel, sample, "pid"), @@ -456,32 +657,32 @@ static int trace__run(struct trace *trace, int argc, const char **argv) const bool forks = argc > 0; if (evlist == NULL) { - printf("Not enough memory to run!\n"); + fprintf(trace->output, "Not enough memory to run!\n"); goto out; } if (perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_enter", trace__sys_enter) || perf_evlist__add_newtp(evlist, "raw_syscalls", "sys_exit", trace__sys_exit)) { - printf("Couldn't read the raw_syscalls tracepoints information!\n"); + fprintf(trace->output, "Couldn't read the raw_syscalls tracepoints information!\n"); goto out_delete_evlist; } if (trace->sched && perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) { - printf("Couldn't read the sched_stat_runtime tracepoint information!\n"); + fprintf(trace->output, "Couldn't read the sched_stat_runtime tracepoint information!\n"); goto out_delete_evlist; } err = perf_evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { - printf("Problems parsing the target to trace, check your options!\n"); + fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); goto out_delete_evlist; } err = trace__symbols_init(trace, evlist); if (err < 0) { - printf("Problems initializing symbol libraries!\n"); + fprintf(trace->output, "Problems initializing symbol libraries!\n"); goto out_delete_maps; } @@ -494,20 +695,20 @@ static int trace__run(struct trace *trace, int argc, const char **argv) err = perf_evlist__prepare_workload(evlist, &trace->opts.target, argv, false, false); if (err < 0) { - printf("Couldn't run the workload!\n"); + fprintf(trace->output, "Couldn't run the workload!\n"); goto out_delete_maps; } } err = perf_evlist__open(evlist); if (err < 0) { - printf("Couldn't create the events: %s\n", strerror(errno)); + fprintf(trace->output, "Couldn't create the events: %s\n", strerror(errno)); goto out_delete_maps; } err = perf_evlist__mmap(evlist, UINT_MAX, false); if (err < 0) { - printf("Couldn't mmap the events: %s\n", strerror(errno)); + fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno)); goto out_close_evlist; } @@ -532,7 +733,7 @@ again: err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { - printf("Can't parse sample, err = %d, skipping...\n", err); + fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); continue; } @@ -540,18 +741,18 @@ again: trace->base_time = sample.time; if (type != PERF_RECORD_SAMPLE) { - trace__process_event(&trace->host, event); + trace__process_event(trace, &trace->host, event); continue; } evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { - printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); + fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); continue; } if (sample.raw_data == NULL) { - printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", + fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", perf_evsel__name(evsel), sample.tid, sample.cpu, sample.raw_size); continue; @@ -640,6 +841,23 @@ static int trace__set_duration(const struct option *opt, const char *str, return 0; } +static int trace__open_output(struct trace *trace, const char *filename) +{ + struct stat st; + + if (!stat(filename, &st) && st.st_size) { + char oldname[PATH_MAX]; + + scnprintf(oldname, sizeof(oldname), "%s.old", filename); + unlink(oldname); + rename(filename, oldname); + } + + trace->output = fopen(filename, "w"); + + return trace->output == NULL ? -errno : 0; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const trace_usage[] = { @@ -662,29 +880,33 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .no_delay = true, .mmap_pages = 1024, }, + .output = stdout, }; + const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of events to trace"), + OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", "trace events on existing process id"), - OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", + OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", "trace events on existing thread id"), - OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, + OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, "system-wide collection from all CPUs"), - OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", + OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", "list of cpus to monitor"), - OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, + OPT_BOOLEAN('i', "no-inherit", &trace.opts.no_inherit, "child tasks do not inherit counters"), - OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, + OPT_UINTEGER('m', "mmap-pages", &trace.opts.mmap_pages, "number of mmap data pages"), - OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", + OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user", "user to profile"), OPT_CALLBACK(0, "duration", &trace, "float", "show only events with duration > N.M ms", trace__set_duration), OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), + OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_END() }; int err; @@ -692,26 +914,41 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, trace_options, trace_usage, 0); + if (output_name != NULL) { + err = trace__open_output(&trace, output_name); + if (err < 0) { + perror("failed to create output file"); + goto out; + } + } + if (ev_qualifier_str != NULL) { - trace.ev_qualifier = strlist__new(true, ev_qualifier_str); + const char *s = ev_qualifier_str; + + trace.not_ev_qualifier = *s == '!'; + if (trace.not_ev_qualifier) + ++s; + trace.ev_qualifier = strlist__new(true, s); if (trace.ev_qualifier == NULL) { - puts("Not enough memory to parse event qualifier"); - return -ENOMEM; + fputs("Not enough memory to parse event qualifier", + trace.output); + err = -ENOMEM; + goto out_close; } } err = perf_target__validate(&trace.opts.target); if (err) { perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); - printf("%s", bf); - return err; + fprintf(trace.output, "%s", bf); + goto out_close; } err = perf_target__parse_uid(&trace.opts.target); if (err) { perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); - printf("%s", bf); - return err; + fprintf(trace.output, "%s", bf); + goto out_close; } if (!argc && perf_target__none(&trace.opts.target)) @@ -720,7 +957,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) err = trace__run(&trace, argc, argv); if (trace.sched && !err) - trace__fprintf_thread_summary(&trace, stdout); + trace__fprintf_thread_summary(&trace, trace.output); +out_close: + if (output_name != NULL) + fclose(trace.output); +out: return err; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c7d111f74553..1f5105ac5c85 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -14,6 +14,7 @@ #include "target.h" #include "evlist.h" #include "evsel.h" +#include "debug.h" #include <unistd.h> #include "parse-events.h" @@ -486,6 +487,7 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int m int nr_cpus = cpu_map__nr(evlist->cpus); int nr_threads = thread_map__nr(evlist->threads); + pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; @@ -524,6 +526,7 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, in int thread; int nr_threads = thread_map__nr(evlist->threads); + pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 960394ea1e3a..47cbe1e58b73 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -13,6 +13,7 @@ #include <traceevent/event-parse.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> +#include <sys/resource.h> #include "asm/bug.h" #include "evsel.h" #include "evlist.h" @@ -21,6 +22,7 @@ #include "thread_map.h" #include "target.h" #include "perf_regs.h" +#include "debug.h" static struct { bool sample_id_all; @@ -861,12 +863,72 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) return fd; } +#define __PRINT_ATTR(fmt, cast, field) \ + fprintf(fp, " %-19s "fmt"\n", #field, cast attr->field) + +#define PRINT_ATTR_U32(field) __PRINT_ATTR("%u" , , field) +#define PRINT_ATTR_X32(field) __PRINT_ATTR("%#x", , field) +#define PRINT_ATTR_U64(field) __PRINT_ATTR("%" PRIu64, (uint64_t), field) +#define PRINT_ATTR_X64(field) __PRINT_ATTR("%#"PRIx64, (uint64_t), field) + +#define PRINT_ATTR2N(name1, field1, name2, field2) \ + fprintf(fp, " %-19s %u %-19s %u\n", \ + name1, attr->field1, name2, attr->field2) + +#define PRINT_ATTR2(field1, field2) \ + PRINT_ATTR2N(#field1, field1, #field2, field2) + +static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) +{ + size_t ret = 0; + + ret += fprintf(fp, "%.60s\n", graph_dotted_line); + ret += fprintf(fp, "perf_event_attr:\n"); + + ret += PRINT_ATTR_U32(type); + ret += PRINT_ATTR_U32(size); + ret += PRINT_ATTR_X64(config); + ret += PRINT_ATTR_U64(sample_period); + ret += PRINT_ATTR_U64(sample_freq); + ret += PRINT_ATTR_X64(sample_type); + ret += PRINT_ATTR_X64(read_format); + + ret += PRINT_ATTR2(disabled, inherit); + ret += PRINT_ATTR2(pinned, exclusive); + ret += PRINT_ATTR2(exclude_user, exclude_kernel); + ret += PRINT_ATTR2(exclude_hv, exclude_idle); + ret += PRINT_ATTR2(mmap, comm); + ret += PRINT_ATTR2(freq, inherit_stat); + ret += PRINT_ATTR2(enable_on_exec, task); + ret += PRINT_ATTR2(watermark, precise_ip); + ret += PRINT_ATTR2(mmap_data, sample_id_all); + ret += PRINT_ATTR2(exclude_host, exclude_guest); + ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel, + "excl.callchain_user", exclude_callchain_user); + + ret += PRINT_ATTR_U32(wakeup_events); + ret += PRINT_ATTR_U32(wakeup_watermark); + ret += PRINT_ATTR_X32(bp_type); + ret += PRINT_ATTR_X64(bp_addr); + ret += PRINT_ATTR_X64(config1); + ret += PRINT_ATTR_U64(bp_len); + ret += PRINT_ATTR_X64(config2); + ret += PRINT_ATTR_X64(branch_sample_type); + ret += PRINT_ATTR_X64(sample_regs_user); + ret += PRINT_ATTR_U32(sample_stack_user); + + ret += fprintf(fp, "%.60s\n", graph_dotted_line); + + return ret; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { int cpu, thread; unsigned long flags = 0; int pid = -1, err; + enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE; if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) @@ -884,6 +946,9 @@ retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; + if (verbose >= 2) + perf_event_attr__fprintf(&evsel->attr, stderr); + for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { @@ -893,6 +958,9 @@ retry_sample_id: pid = threads->map[thread]; group_fd = get_group_fd(evsel, cpu, thread); +retry_open: + pr_debug2("perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", + pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, pid, @@ -902,12 +970,37 @@ retry_sample_id: err = -errno; goto try_fallback; } + set_rlimit = NO_CHANGE; } } return 0; try_fallback: + /* + * perf stat needs between 5 and 22 fds per CPU. When we run out + * of them try to increase the limits. + */ + if (err == -EMFILE && set_rlimit < INCREASED_MAX) { + struct rlimit l; + int old_errno = errno; + + if (getrlimit(RLIMIT_NOFILE, &l) == 0) { + if (set_rlimit == NO_CHANGE) + l.rlim_cur = l.rlim_max; + else { + l.rlim_cur = l.rlim_max + 1000; + l.rlim_max = l.rlim_cur; + } + if (setrlimit(RLIMIT_NOFILE, &l) == 0) { + set_rlimit++; + errno = old_errno; + goto retry_open; + } + } + errno = old_errno; + } + if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 4514e7e9b659..59486c180626 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -253,7 +253,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) return; } -static struct thread *__machine__findnew_thread(struct machine *machine, pid_t tid, +static struct thread *__machine__findnew_thread(struct machine *machine, + pid_t pid, pid_t tid, bool create) { struct rb_node **p = &machine->threads.rb_node; @@ -265,8 +266,11 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t * so most of the time we dont have to look up * the full rbtree: */ - if (machine->last_match && machine->last_match->tid == tid) + if (machine->last_match && machine->last_match->tid == tid) { + if (pid && pid != machine->last_match->pid_) + machine->last_match->pid_ = pid; return machine->last_match; + } while (*p != NULL) { parent = *p; @@ -274,6 +278,8 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t if (th->tid == tid) { machine->last_match = th; + if (pid && pid != th->pid_) + th->pid_ = pid; return th; } @@ -286,7 +292,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t if (!create) return NULL; - th = thread__new(tid); + th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, &machine->threads); @@ -298,12 +304,12 @@ static struct thread *__machine__findnew_thread(struct machine *machine, pid_t t struct thread *machine__findnew_thread(struct machine *machine, pid_t tid) { - return __machine__findnew_thread(machine, tid, true); + return __machine__findnew_thread(machine, 0, tid, true); } struct thread *machine__find_thread(struct machine *machine, pid_t tid) { - return __machine__findnew_thread(machine, tid, false); + return __machine__findnew_thread(machine, 0, tid, false); } int machine__process_comm_event(struct machine *machine, union perf_event *event) @@ -1031,11 +1037,27 @@ out_problem: return 0; } +static void machine__remove_thread(struct machine *machine, struct thread *th) +{ + machine->last_match = NULL; + rb_erase(&th->rb_node, &machine->threads); + /* + * We may have references to this thread, for instance in some hist_entry + * instances, so just move them to a separate list. + */ + list_add_tail(&th->node, &machine->dead_threads); +} + int machine__process_fork_event(struct machine *machine, union perf_event *event) { - struct thread *thread = machine__findnew_thread(machine, event->fork.tid); + struct thread *thread = machine__find_thread(machine, event->fork.tid); struct thread *parent = machine__findnew_thread(machine, event->fork.ptid); + /* if a thread currently exists for the thread id remove it */ + if (thread != NULL) + machine__remove_thread(machine, thread); + + thread = machine__findnew_thread(machine, event->fork.tid); if (dump_trace) perf_event__fprintf_task(event, stdout); @@ -1048,18 +1070,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event return 0; } -static void machine__remove_thread(struct machine *machine, struct thread *th) -{ - machine->last_match = NULL; - rb_erase(&th->rb_node, &machine->threads); - /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. - */ - list_add_tail(&th->node, &machine->dead_threads); -} - -int machine__process_exit_event(struct machine *machine, union perf_event *event) +int machine__process_exit_event(struct machine *machine __maybe_unused, + union perf_event *event) { struct thread *thread = machine__find_thread(machine, event->fork.tid); @@ -1067,7 +1079,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event perf_event__fprintf_task(event, stdout); if (thread != NULL) - machine__remove_thread(machine, thread); + thread__exited(thread); return 0; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 925e0c3e6d91..381f4fda9654 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -8,6 +8,26 @@ #include "cpumap.h" #include "thread_map.h" +/* + * Support debug printing even though util/debug.c is not linked. That means + * implementing 'verbose' and 'eprintf'. + */ +int verbose; + +int eprintf(int level, const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (verbose >= level) { + va_start(args, fmt); + ret = vfprintf(stderr, fmt, args); + va_end(args); + } + + return ret; +} + /* Define PyVarObject_HEAD_INIT for python 2.5 */ #ifndef PyVarObject_HEAD_INIT # define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 6feeb88eb5b0..e3d4a550a703 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -7,12 +7,13 @@ #include "util.h" #include "debug.h" -struct thread *thread__new(pid_t tid) +struct thread *thread__new(pid_t pid, pid_t tid) { struct thread *self = zalloc(sizeof(*self)); if (self != NULL) { map_groups__init(&self->mg); + self->pid_ = pid; self->tid = tid; self->ppid = -1; self->comm = malloc(32); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 13c62c909392..4ebbb40d46d4 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -12,10 +12,12 @@ struct thread { struct list_head node; }; struct map_groups mg; + pid_t pid_; /* Not all tools update this */ pid_t tid; pid_t ppid; char shortname[3]; bool comm_set; + bool dead; /* if set thread has exited */ char *comm; int comm_len; @@ -24,8 +26,12 @@ struct thread { struct machine; -struct thread *thread__new(pid_t tid); +struct thread *thread__new(pid_t pid, pid_t tid); void thread__delete(struct thread *self); +static inline void thread__exited(struct thread *thread) +{ + thread->dead = true; +} int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); |